Java 类weka.core.AttributeStats 实例源码

项目:MLDA    文件:MeanEntropiesNominalAttributes.java   
/**
 * Calculate metric value
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData){
    double mean = 0.0;

    Instances instances = mlData.getDataSet();

       int countNominal = 0;
       int [] featureIndices = mlData.getFeatureIndices();

       for(int fIndex : featureIndices){
           AttributeStats attStats = instances.attributeStats(fIndex);
           if(attStats.nominalCounts != null){
               countNominal++;
               mean += Utils.entropy(attStats.nominalCounts);
           }
       }

       mean = mean/countNominal;

    this.value = mean;
    return value;
}
项目:repo.kmeanspp.silhouette_score    文件:Canopy.java   
/**
 * Pretty hokey heuristic to try and set t2 distance automatically based on
 * standard deviation
 * 
 * @param trainingBatch the training instances
 * @throws Exception if a problem occurs
 */
protected void setT2T1BasedOnStdDev(Instances trainingBatch) throws Exception {
  double normalizedStdDevSum = 0;

  for (int i = 0; i < trainingBatch.numAttributes(); i++) {
    if (trainingBatch.attribute(i).isNominal()) {
      normalizedStdDevSum += 0.25;
    } else if (trainingBatch.attribute(i).isNumeric()) {
      AttributeStats stats = trainingBatch.attributeStats(i);
      if (trainingBatch.numInstances() - stats.missingCount > 2) {
        double stdDev = stats.numericStats.stdDev;
        double min = stats.numericStats.min;
        double max = stats.numericStats.max;
        if (!Utils.isMissingValue(stdDev) && max - min > 0) {
          stdDev = 0.5 * stdDev / (max - min);
          normalizedStdDevSum += stdDev;
        }
      }
    }
  }

  normalizedStdDevSum = Math.sqrt(normalizedStdDevSum);
  if (normalizedStdDevSum > 0) {
    m_t2 = normalizedStdDevSum;
  }
}
项目:repo.kmeanspp.silhouette_score    文件:AttributeSummaryPanel.java   
/**
 * Tells the panel to use a new set of instances.
 * 
 * @param inst a set of Instances
 */
public void setInstances(Instances inst) {

  m_Instances = inst;
  m_AttributeStats = new AttributeStats[inst.numAttributes()];
  m_AttributeNameLab.setText(NO_SOURCE);
  m_AttributeTypeLab.setText(NO_SOURCE);
  m_MissingLab.setText(NO_SOURCE);
  m_UniqueLab.setText(NO_SOURCE);
  m_DistinctLab.setText(NO_SOURCE);
  m_StatsTable.setModel(new DefaultTableModel());

  m_allEqualWeights = true;
  if (m_Instances.numInstances() == 0) {
    return;
  }
  double w = m_Instances.instance(0).weight();
  for (int i = 1; i < m_Instances.numInstances(); i++) {
    if (m_Instances.instance(i).weight() != w) {
      m_allEqualWeights = false;
      break;
    }
  }
}
项目:autoweka    文件:EMImputationTest.java   
public void testTypical() {
  Instances result = useFilter();
  // Number of attributes shouldn't change
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  // Number of instances may change (if an instance has all missing values)
  // assertEquals(m_Instances.numInstances(), result.numInstances());
  for (int j = 0; j < result.numAttributes(); j++) {
    if (j == m_Instances.classIndex() && m_Instances.attribute(j).isNumeric() == false) {
      continue;
    }
    AttributeStats currentStats = m_Instances.attributeStats(j);
    if (currentStats.distinctCount < 2) {
      continue;
    }
    assertTrue("All missing values except for those in nonnumeric class " +
                "attributes should be replaced.", 
                result.attributeStats(j).missingCount == 0);
  }
}
项目:autoweka    文件:AttributeSummaryPanel.java   
/**
 * Tells the panel to use a new set of instances.
 *
 * @param inst a set of Instances
 */
public void setInstances(Instances inst) {

  m_Instances = inst;
  m_AttributeStats = new AttributeStats [inst.numAttributes()];
  m_AttributeNameLab.setText(NO_SOURCE);
  m_AttributeTypeLab.setText(NO_SOURCE);
  m_MissingLab.setText(NO_SOURCE);
  m_UniqueLab.setText(NO_SOURCE);
  m_DistinctLab.setText(NO_SOURCE);
  m_StatsTable.setModel(new DefaultTableModel());

  m_allEqualWeights = true;
  double w = m_Instances.instance(0).weight();
  for (int i = 1; i < m_Instances.numInstances(); i++) {
    if (m_Instances.instance(i).weight() != w) {
      m_allEqualWeights = false;
      break;
    }
  }
}
项目:autoweka    文件:SpreadSubsampleTest.java   
private void testDistributionSpread_X(double factor) throws Exception {
  AttributeStats origs = m_Instances.attributeStats(1);
  assertNotNull(origs.nominalCounts);

  ((SpreadSubsample)m_Filter).setDistributionSpread(factor);
  Instances result = useFilter();
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  AttributeStats outs = result.attributeStats(1);

  // Check distributions are pretty similar
  assertNotNull(outs.nominalCounts);
  assertEquals(origs.nominalCounts.length, outs.nominalCounts.length);
  int min = outs.nominalCounts[0];
  int max = outs.nominalCounts[0];
  for (int i = 1; i < outs.nominalCounts.length; i++) {
    if (outs.nominalCounts[i] < min) {
      min = outs.nominalCounts[i];
    }
    if (outs.nominalCounts[i] > max) {
      max = outs.nominalCounts[i];
    }
  }
  assertTrue(max / factor <= min);
}
项目:autoweka    文件:ResampleTest.java   
public void testNoBias() throws Exception {
  m_Instances.setClassIndex(1);
  AttributeStats origs = m_Instances.attributeStats(1);
  assertNotNull(origs.nominalCounts);

  Instances result = useFilter();
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  AttributeStats outs = result.attributeStats(1);

  // Check distributions are pretty similar
  assertNotNull(outs.nominalCounts);
  assertEquals(origs.nominalCounts.length, outs.nominalCounts.length);
  for (int i = 0; i < origs.nominalCounts.length; i++) {
    int est = origs.nominalCounts[i] / 2 - 1;
    assertTrue("Counts for value:" + i 
           + " orig:" + origs.nominalCounts[i] 
           + " out50%:" + outs.nominalCounts[i], 
           (est <= outs.nominalCounts[i]) &&
           (outs.nominalCounts[i] <= (est + 3)));
  }
}
项目:autoweka    文件:ResampleTest.java   
public void testBiasToUniform() throws Exception {
  m_Instances.setClassIndex(1);
  AttributeStats origs = m_Instances.attributeStats(1);
  assertNotNull(origs.nominalCounts);

  ((Resample)m_Filter).setBiasToUniformClass(1.0);
  Instances result = useFilter();
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  AttributeStats outs = result.attributeStats(1);

  // Check distributions are pretty similar
  assertNotNull(outs.nominalCounts);
  assertEquals(origs.nominalCounts.length, outs.nominalCounts.length);
  int est = (origs.totalCount - origs.missingCount) / origs.distinctCount;
  est = est / 2 - 1;
  for (int i = 0; i < origs.nominalCounts.length; i++) {
    assertTrue("Counts for value:" + i 
           + " orig:" + origs.nominalCounts[i] 
           + " out50%:" + outs.nominalCounts[i]
           + " ~wanted:" + est,
           (est <= outs.nominalCounts[i]) &&
           (outs.nominalCounts[i] <= (est + 3)));
  }
}
项目:umple    文件:Canopy.java   
/**
 * Pretty hokey heuristic to try and set t2 distance automatically based on
 * standard deviation
 * 
 * @param trainingBatch the training instances
 * @throws Exception if a problem occurs
 */
protected void setT2T1BasedOnStdDev(Instances trainingBatch) throws Exception {
  double normalizedStdDevSum = 0;

  for (int i = 0; i < trainingBatch.numAttributes(); i++) {
    if (trainingBatch.attribute(i).isNominal()) {
      normalizedStdDevSum += 0.25;
    } else if (trainingBatch.attribute(i).isNumeric()) {
      AttributeStats stats = trainingBatch.attributeStats(i);
      if (trainingBatch.numInstances() - stats.missingCount > 2) {
        double stdDev = stats.numericStats.stdDev;
        double min = stats.numericStats.min;
        double max = stats.numericStats.max;
        if (!Utils.isMissingValue(stdDev) && max - min > 0) {
          stdDev = 0.5 * stdDev / (max - min);
          normalizedStdDevSum += stdDev;
        }
      }
    }
  }

  normalizedStdDevSum = Math.sqrt(normalizedStdDevSum);
  if (normalizedStdDevSum > 0) {
    m_t2 = normalizedStdDevSum;
  }
}
项目:umple    文件:AttributeSummaryPanel.java   
/**
 * Tells the panel to use a new set of instances.
 * 
 * @param inst a set of Instances
 */
public void setInstances(Instances inst) {

  m_Instances = inst;
  m_AttributeStats = new AttributeStats[inst.numAttributes()];
  m_AttributeNameLab.setText(NO_SOURCE);
  m_AttributeTypeLab.setText(NO_SOURCE);
  m_MissingLab.setText(NO_SOURCE);
  m_UniqueLab.setText(NO_SOURCE);
  m_DistinctLab.setText(NO_SOURCE);
  m_StatsTable.setModel(new DefaultTableModel());

  m_allEqualWeights = true;
  if (m_Instances.numInstances() == 0) {
    return;
  }
  double w = m_Instances.instance(0).weight();
  for (int i = 1; i < m_Instances.numInstances(); i++) {
    if (m_Instances.instance(i).weight() != w) {
      m_allEqualWeights = false;
      break;
    }
  }
}
项目:umple    文件:SpreadSubsampleTest.java   
private void testDistributionSpread_X(double factor) throws Exception {
  AttributeStats origs = m_Instances.attributeStats(1);
  assertNotNull(origs.nominalCounts);

  ((SpreadSubsample)m_Filter).setDistributionSpread(factor);
  Instances result = useFilter();
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  AttributeStats outs = result.attributeStats(1);

  // Check distributions are pretty similar
  assertNotNull(outs.nominalCounts);
  assertEquals(origs.nominalCounts.length, outs.nominalCounts.length);
  int min = outs.nominalCounts[0];
  int max = outs.nominalCounts[0];
  for (int i = 1; i < outs.nominalCounts.length; i++) {
    if (outs.nominalCounts[i] < min) {
      min = outs.nominalCounts[i];
    }
    if (outs.nominalCounts[i] > max) {
      max = outs.nominalCounts[i];
    }
  }
  assertTrue(max / factor <= min);
}
项目:umple    文件:ResampleTest.java   
public void testNoBias() throws Exception {
  m_Instances.setClassIndex(1);
  AttributeStats origs = m_Instances.attributeStats(1);
  assertNotNull(origs.nominalCounts);

  Instances result = useFilter();
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  AttributeStats outs = result.attributeStats(1);

  // Check distributions are pretty similar
  assertNotNull(outs.nominalCounts);
  assertEquals(origs.nominalCounts.length, outs.nominalCounts.length);
  for (int i = 0; i < origs.nominalCounts.length; i++) {
    int est = origs.nominalCounts[i] / 2 - 1;
    assertTrue("Counts for value:" + i 
           + " orig:" + origs.nominalCounts[i] 
           + " out50%:" + outs.nominalCounts[i], 
           (est <= outs.nominalCounts[i]) &&
           (outs.nominalCounts[i] <= (est + 3)));
  }
}
项目:umple    文件:ResampleTest.java   
public void testBiasToUniform() throws Exception {
  m_Instances.setClassIndex(1);
  AttributeStats origs = m_Instances.attributeStats(1);
  assertNotNull(origs.nominalCounts);

  ((Resample)m_Filter).setBiasToUniformClass(1.0);
  Instances result = useFilter();
  assertEquals(m_Instances.numAttributes(), result.numAttributes());
  AttributeStats outs = result.attributeStats(1);

  // Check distributions are pretty similar
  assertNotNull(outs.nominalCounts);
  assertEquals(origs.nominalCounts.length, outs.nominalCounts.length);
  int est = (origs.totalCount - origs.missingCount) / origs.distinctCount;
  est = est / 2 - 1;
  for (int i = 0; i < origs.nominalCounts.length; i++) {
    assertTrue("Counts for value:" + i 
           + " orig:" + origs.nominalCounts[i] 
           + " out50%:" + outs.nominalCounts[i]
           + " ~wanted:" + est,
           (est <= outs.nominalCounts[i]) &&
           (outs.nominalCounts[i] <= (est + 3)));
  }
}
项目:MLDA    文件:MinEntropy.java   
/**
 * Calculate metric value
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData){
       Instances instances = mlData.getDataSet();

       int nLabels = mlData.getNumLabels();
       int [] labels = mlData.getLabelIndices();

       double [] entropies = new double[nLabels];

       for(int i=0; i<nLabels; i++){
           AttributeStats attStats = instances.attributeStats(labels[i]);

           if(attStats.nominalCounts != null){
               entropies[i] = Utils.entropy(attStats.nominalCounts);
           }
       }

       double minEntropy = Double.MAX_VALUE;
       for(double e : entropies){
           if(e < minEntropy){
               minEntropy = e;
           }
       }

       this.value = minEntropy;

       return value;
}
项目:MLDA    文件:MaxEntropy.java   
/**
 * Calculate metric value
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData){        
    Instances instances = mlData.getDataSet();

    int nLabels = mlData.getNumLabels();
       int [] labels = mlData.getLabelIndices();

       double [] entropies = new double[nLabels];

       for(int i=0; i<nLabels; i++){
           AttributeStats attStats = instances.attributeStats(labels[i]);

           if(attStats.nominalCounts != null){
               entropies[i] = Utils.entropy(attStats.nominalCounts);
           }
       }

       double maxEntropy = Double.MIN_VALUE;
       for(double e : entropies){
           if(e > maxEntropy){
               maxEntropy = e;
           }
       }

       this.value = maxEntropy;

       return value;
}
项目:MLDA    文件:MeanEntropy.java   
/**
 * Calculate metric value
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData){        
    Instances instances = mlData.getDataSet();

    int nLabels = mlData.getNumLabels();
       int [] labels = mlData.getLabelIndices();

       double [] entropies = new double[nLabels];

       for(int i=0; i<nLabels; i++){
           AttributeStats attStats = instances.attributeStats(labels[i]);

           if(attStats.nominalCounts != null){
               entropies[i] = Utils.entropy(attStats.nominalCounts);
           }
       }

       double meanEntropy = 0;
       for(double e : entropies){
           meanEntropy += e;
       }
       meanEntropy /= entropies.length;

       this.value = meanEntropy;

       return value;
}
项目:repo.kmeanspp.silhouette_score    文件:IncrementalQuantileEstimator.java   
public static void main(String[] args) {
  try {
    weka.core.Instances inst =
      new weka.core.Instances(new java.io.FileReader(args[0]));

    double quantile = Double.parseDouble(args[1]);
    IncrementalQuantileEstimator ps =
      new IncrementalQuantileEstimator(quantile);

    int attIndex = Integer.parseInt(args[2]) - 1;

    for (int i = 0; i < inst.numInstances(); i++) {
      if (!inst.instance(i).isMissing(attIndex)) {
        ps.add(inst.instance(i).value(attIndex));
      }
    }

    System.err.println("Estimated quantile (" + quantile + ") "
      + ps.getQuantile());

    inst.sort(attIndex);
    double actualQuant = 0;
    AttributeStats as = inst.attributeStats(attIndex);
    double pIndex = quantile * (inst.numInstances() - as.missingCount);
    double mean = as.numericStats.mean;
    if (pIndex - (int) pIndex > 0) {
      pIndex = (int) pIndex;
      actualQuant = inst.instance((int) pIndex).value(attIndex);
    } else {
      double f = inst.instance((int) pIndex - 1).value(attIndex);
      double s = inst.instance((int) pIndex).value(attIndex);
      actualQuant = (f + s) / 2.0;
    }

    System.err.println("Actual quantile (" + quantile + ") " + actualQuant);
    System.err.println("Mean: " + mean);
  } catch (Exception ex) {
    ex.printStackTrace();
  }
}
项目:repo.kmeanspp.silhouette_score    文件:Cobweb.java   
/**
 * Update attribute stats using the supplied instance.
 * 
 * @param updateInstance the instance for updating
 * @param delete true if the values of the supplied instance are to be
 *          removed from the statistics
 */
protected void updateStats(Instance updateInstance, boolean delete) {

  if (m_attStats == null) {
    m_attStats = new AttributeStats[m_numAttributes];
    for (int i = 0; i < m_numAttributes; i++) {
      m_attStats[i] = new AttributeStats();
      if (m_clusterInstances.attribute(i).isNominal()) {
        m_attStats[i].nominalCounts = new int[m_clusterInstances.attribute(
          i).numValues()];
      } else {
        m_attStats[i].numericStats = new Stats();
      }
    }
  }
  for (int i = 0; i < m_numAttributes; i++) {
    if (!updateInstance.isMissing(i)) {
      double value = updateInstance.value(i);
      if (m_clusterInstances.attribute(i).isNominal()) {
        m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance
          .weight()) : updateInstance.weight();
        m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance
          .weight()) : updateInstance.weight();
      } else {
        if (delete) {
          m_attStats[i].numericStats.subtract(value,
            updateInstance.weight());
        } else {
          m_attStats[i].numericStats.add(value, updateInstance.weight());
        }
      }
    }
  }
  m_totalInstances += (delete) ? (-1.0 * updateInstance.weight())
    : (updateInstance.weight());
}
项目:repo.kmeanspp.silhouette_score    文件:AttributeVisualizationPanel.java   
/**
 * Sets the instances for use
 * 
 * @param newins a set of Instances
 */
public void setInstances(Instances newins) {

  m_attribIndex = 0;
  m_as = null;
  m_data = new Instances(newins);
  if (m_colorAttrib != null) {
    m_colorAttrib.removeAllItems();
    m_colorAttrib.addItem("No class");
    for (int i = 0; i < m_data.numAttributes(); i++) {
      String type = "(" + Attribute.typeToStringShort(m_data.attribute(i))
        + ")";
      m_colorAttrib.addItem(new String("Class: " + m_data.attribute(i).name()
        + " " + type));
    }
    if (m_data.classIndex() >= 0) {
      m_colorAttrib.setSelectedIndex(m_data.classIndex() + 1);
    } else {
      m_colorAttrib.setSelectedIndex(m_data.numAttributes());
    }
    // if (m_data.classIndex() >= 0) {
    // m_colorAttrib.setSelectedIndex(m_data.classIndex());
    // }
  }
  if (m_data.classIndex() >= 0) {
    m_classIndex = m_data.classIndex();
  } else {
    m_classIndex = m_data.numAttributes() - 1;
  }

  m_asCache = new AttributeStats[m_data.numAttributes()];
}
项目:repo.kmeanspp.silhouette_score    文件:AttributeSummaryPanel.java   
/**
 * Sets the gui elements for fields that are stored in the AttributeStats
 * structure.
 * 
 * @param index the index of the attribute
 */
protected void setDerived(int index) {

  AttributeStats as = m_AttributeStats[index];
  long percent = Math.round(100.0 * as.missingCount / as.totalCount);
  m_MissingLab.setText("" + as.missingCount + " (" + percent + "%)");
  percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
  m_UniqueLab.setText("" + as.uniqueCount + " (" + percent + "%)");
  m_DistinctLab.setText("" + as.distinctCount);
  setTable(as, index);
}
项目:collective-classification-weka-package    文件:Chopper.java   
/**
 * builds the classifier
 * 
 * @throws Exception    if something goes wrong
 */
@Override
protected void build() throws Exception {
  AttributeStats        stats;
  int                   i;

  // determine class distribution
  m_ClassDistribution = new double[2];
  stats = m_Trainset.attributeStats(m_Trainset.classIndex());
  for (i = 0; i < 2; i++)
    m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount;

  // the number of instances added to the training set in each iteration
  m_InstancesPerIteration =   (double) m_Testset.numInstances() 
                            / getFolds();
  if (getDebug())
    System.out.println("InstancesPerIteration: " + m_InstancesPerIteration);

  // build classifier
  m_Random = new Random(getSeed());
  for (i = 0; i <= getFolds(); i++) {
    if (getVerbose() || getDebug()) {
      if (getCutOff() > 0)
        System.out.println(   "\nFold " + i + "/" + getFolds() 
                            + " (CutOff at " + getCutOff() + ")");
      else
        System.out.println("\nFold " + i + "/" + getFolds());
    }
    buildTrainSet(i);
    buildClassifier();

    // cutoff of folds reached?
    if ( (i > 0) && (i == getCutOff()) )
      break;
  }
}
项目:collective-classification-weka-package    文件:DecisionTreeNode.java   
/**
 * sets the class probabilities based on the given data
 * 
 * @param data  the data to get the class probabilities from
 */
public void setClassProbabilities(Instances data) {
  AttributeStats    stats;
  int           total;
  int           i;

  stats = data.attributeStats(data.classIndex());
  total = Utils.sum(stats.nominalCounts);
  m_ClassProbs = new double[data.classAttribute().numValues()];
  for (i = 0; i < m_ClassProbs.length; i++)
    m_ClassProbs[i] = (double) stats.nominalCounts[i] / (double) total;
}
项目:collective-classification-weka-package    文件:CollectiveInstances.java   
/**
 * randomly initializes the class labels in the given set according to the
 * class distribution in the training set
 * @param train       the training instances to retrieve the class
 *                    distribution from
 * @param instances   the instances to initialize
 * @param from        the first instance to initialize
 * @param count       the number of instances to initialize
 * @return            the initialize instances
 * @throws Exception  if something goes wrong
 */
public Instances initializeLabels( Instances train, Instances instances, 
                                   int from, int count )
  throws Exception {

  int             i;
  AttributeStats  stats;
  Attribute       classAttr;
  double          percentage;

  // reset flip count
  m_FlippedLabels = 0;

  // explicitly set labels to "missing"
  for (i = from; i < from + count; i++)
    instances.instance(i).setClassMissing();

  // determining the percentage of the first class
  stats      = train.attributeStats(train.classIndex());
  percentage = (double) stats.nominalCounts[0] / (double) stats.totalCount;

  // set lables
  classAttr = instances.attribute(instances.classIndex());
  for (i = from; i < from + count; i++) {
    // random class
    if (m_Random.nextDouble() < percentage)
      instances.instance(i).setClassValue(classAttr.value(0));
    else
      instances.instance(i).setClassValue(classAttr.value(1));
  }

  return instances;
}
项目:autoweka    文件:Cobweb.java   
/**
    * Update attribute stats using the supplied instance. 
    *
    * @param updateInstance the instance for updating
    * @param delete true if the values of the supplied instance are
    * to be removed from the statistics
    */
   protected void updateStats(Instance updateInstance, 
               boolean delete) {

     if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
  m_attStats[i] = new AttributeStats();
  if (m_clusterInstances.attribute(i).isNominal()) {
    m_attStats[i].nominalCounts = 
      new int [m_clusterInstances.attribute(i).numValues()];
  } else {
    m_attStats[i].numericStats = new Stats();
  }
}
     }
     for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
  double value = updateInstance.value(i);
  if (m_clusterInstances.attribute(i).isNominal()) {
    m_attStats[i].nominalCounts[(int)value] += (delete) ? 
      (-1.0 * updateInstance.weight()) : 
      updateInstance.weight();
    m_attStats[i].totalCount += (delete) ?
      (-1.0 * updateInstance.weight()) :
      updateInstance.weight();
  } else {
    if (delete) {
      m_attStats[i].numericStats.subtract(value, 
                      updateInstance.weight());
    } else {
      m_attStats[i].numericStats.add(value, updateInstance.weight());
    }
  }
}
     }
     m_totalInstances += (delete) 
? (-1.0 * updateInstance.weight()) 
: (updateInstance.weight());
   }
项目:autoweka    文件:AttributeVisualizationPanel.java   
/**
  * Sets the instances for use
  *
  * @param newins a set of Instances
  */
 public void setInstances(Instances newins) {

   m_attribIndex = 0;
   m_as = null;
   m_data = new Instances(newins);
   if(m_colorAttrib!=null) {
     m_colorAttrib.removeAllItems();
     m_colorAttrib.addItem("No class");
     for(int i=0; i<m_data.numAttributes(); i++) {
String type = "(" + Attribute.typeToStringShort(m_data.attribute(i)) + ")";
       m_colorAttrib.addItem(new String("Class: " + m_data.attribute(i).name() + " " + type));
     }
     if (m_data.classIndex() >= 0) {
       m_colorAttrib.setSelectedIndex(m_data.classIndex() + 1);
     } else {
       m_colorAttrib.setSelectedIndex(m_data.numAttributes());
     }
     //if (m_data.classIndex() >= 0) {
     //    m_colorAttrib.setSelectedIndex(m_data.classIndex());
     //}
   }
   if (m_data.classIndex() >= 0) {
     m_classIndex = m_data.classIndex();
   } else {
     m_classIndex = m_data.numAttributes()-1;
   }

   m_asCache = new AttributeStats[m_data.numAttributes()];
 }
项目:autoweka    文件:AttributeSummaryPanel.java   
/**
 * Sets the gui elements for fields that are stored in the AttributeStats
 * structure.
 * 
 * @param index the index of the attribute
 */
protected void setDerived(int index) {

  AttributeStats as = m_AttributeStats[index];
  long percent = Math.round(100.0 * as.missingCount / as.totalCount);
  m_MissingLab.setText("" + as.missingCount + " (" + percent + "%)");
  percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
  m_UniqueLab.setText("" + as.uniqueCount + " (" + percent + "%)");
  m_DistinctLab.setText("" + as.distinctCount);
  setTable(as, index);
}
项目:autoweka    文件:MathExpression.java   
/**
  * Signify that this batch of input to the filter is finished. 
  * If the filter requires all instances prior to filtering,
  * output() may now be called to retrieve the filtered instances.
  *
  * @return true if there are instances pending output
  * @throws IllegalStateException if no input structure has been defined
  */
 public boolean batchFinished() throws Exception {

   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }
   if (m_attStats == null) {
     Instances input = getInputFormat();

     m_attStats = new AttributeStats [input.numAttributes()];

     for (int i = 0; i < input.numAttributes(); i++) {
if (input.attribute(i).isNumeric() &&
    (input.classIndex() != i)) {
  m_attStats[i] = input.attributeStats(i);
}
     }

     // Convert pending input instances
     for(int i = 0; i < input.numInstances(); i++) {
convertInstance(input.instance(i));
     }
   } 
   // Free memory
   flushInput();

   m_NewBatch = true;
   return (numPendingOutput() != 0);
 }
项目:umple    文件:Cobweb.java   
/**
 * Update attribute stats using the supplied instance.
 * 
 * @param updateInstance the instance for updating
 * @param delete true if the values of the supplied instance are to be
 *          removed from the statistics
 */
protected void updateStats(Instance updateInstance, boolean delete) {

  if (m_attStats == null) {
    m_attStats = new AttributeStats[m_numAttributes];
    for (int i = 0; i < m_numAttributes; i++) {
      m_attStats[i] = new AttributeStats();
      if (m_clusterInstances.attribute(i).isNominal()) {
        m_attStats[i].nominalCounts = new int[m_clusterInstances.attribute(
          i).numValues()];
      } else {
        m_attStats[i].numericStats = new Stats();
      }
    }
  }
  for (int i = 0; i < m_numAttributes; i++) {
    if (!updateInstance.isMissing(i)) {
      double value = updateInstance.value(i);
      if (m_clusterInstances.attribute(i).isNominal()) {
        m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance
          .weight()) : updateInstance.weight();
        m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance
          .weight()) : updateInstance.weight();
      } else {
        if (delete) {
          m_attStats[i].numericStats.subtract(value,
            updateInstance.weight());
        } else {
          m_attStats[i].numericStats.add(value, updateInstance.weight());
        }
      }
    }
  }
  m_totalInstances += (delete) ? (-1.0 * updateInstance.weight())
    : (updateInstance.weight());
}
项目:umple    文件:AttributeVisualizationPanel.java   
/**
 * Sets the instances for use
 * 
 * @param newins a set of Instances
 */
public void setInstances(Instances newins) {

  m_attribIndex = 0;
  m_as = null;
  m_data = new Instances(newins);
  if (m_colorAttrib != null) {
    m_colorAttrib.removeAllItems();
    m_colorAttrib.addItem("No class");
    for (int i = 0; i < m_data.numAttributes(); i++) {
      String type = "(" + Attribute.typeToStringShort(m_data.attribute(i))
        + ")";
      m_colorAttrib.addItem(new String("Class: " + m_data.attribute(i).name()
        + " " + type));
    }
    if (m_data.classIndex() >= 0) {
      m_colorAttrib.setSelectedIndex(m_data.classIndex() + 1);
    } else {
      m_colorAttrib.setSelectedIndex(m_data.numAttributes());
    }
    // if (m_data.classIndex() >= 0) {
    // m_colorAttrib.setSelectedIndex(m_data.classIndex());
    // }
  }
  if (m_data.classIndex() >= 0) {
    m_classIndex = m_data.classIndex();
  } else {
    m_classIndex = m_data.numAttributes() - 1;
  }

  m_asCache = new AttributeStats[m_data.numAttributes()];
}
项目:umple    文件:AttributeSummaryPanel.java   
/**
 * Sets the gui elements for fields that are stored in the AttributeStats
 * structure.
 * 
 * @param index the index of the attribute
 */
protected void setDerived(int index) {

  AttributeStats as = m_AttributeStats[index];
  long percent = Math.round(100.0 * as.missingCount / as.totalCount);
  m_MissingLab.setText("" + as.missingCount + " (" + percent + "%)");
  percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
  m_UniqueLab.setText("" + as.uniqueCount + " (" + percent + "%)");
  m_DistinctLab.setText("" + as.distinctCount);
  setTable(as, index);
}
项目:umple    文件:MathExpression.java   
/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances.
 * 
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input structure has been defined
 */
@Override
public boolean batchFinished() throws Exception {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }
  if (m_attStats == null) {
    Instances input = getInputFormat();

    m_attStats = new AttributeStats[input.numAttributes()];

    for (int i = 0; i < input.numAttributes(); i++) {
      if (input.attribute(i).isNumeric() && (input.classIndex() != i)) {
        m_attStats[i] = input.attributeStats(i);
      }
    }

    // Convert pending input instances
    for (int i = 0; i < input.numInstances(); i++) {
      convertInstance(input.instance(i));
    }
  }
  // Free memory
  flushInput();

  m_NewBatch = true;
  return (numPendingOutput() != 0);
}
项目:jbossBA    文件:Cobweb.java   
/**
    * Update attribute stats using the supplied instance. 
    *
    * @param updateInstance the instance for updating
    * @param delete true if the values of the supplied instance are
    * to be removed from the statistics
    */
   protected void updateStats(Instance updateInstance, 
               boolean delete) {

     if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
  m_attStats[i] = new AttributeStats();
  if (m_clusterInstances.attribute(i).isNominal()) {
    m_attStats[i].nominalCounts = 
      new int [m_clusterInstances.attribute(i).numValues()];
  } else {
    m_attStats[i].numericStats = new Stats();
  }
}
     }
     for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
  double value = updateInstance.value(i);
  if (m_clusterInstances.attribute(i).isNominal()) {
    m_attStats[i].nominalCounts[(int)value] += (delete) ? 
      (-1.0 * updateInstance.weight()) : 
      updateInstance.weight();
    m_attStats[i].totalCount += (delete) ?
      (-1.0 * updateInstance.weight()) :
      updateInstance.weight();
  } else {
    if (delete) {
      m_attStats[i].numericStats.subtract(value, 
                      updateInstance.weight());
    } else {
      m_attStats[i].numericStats.add(value, updateInstance.weight());
    }
  }
}
     }
     m_totalInstances += (delete) 
? (-1.0 * updateInstance.weight()) 
: (updateInstance.weight());
   }
项目:jbossBA    文件:AttributeSummaryPanel.java   
/**
 * Tells the panel to use a new set of instances.
 *
 * @param inst a set of Instances
 */
public void setInstances(Instances inst) {

  m_Instances = inst;
  m_AttributeStats = new AttributeStats [inst.numAttributes()];
  m_AttributeNameLab.setText(NO_SOURCE);
  m_AttributeTypeLab.setText(NO_SOURCE);
  m_MissingLab.setText(NO_SOURCE);
  m_UniqueLab.setText(NO_SOURCE);
  m_DistinctLab.setText(NO_SOURCE);
  m_StatsTable.setModel(new DefaultTableModel());
}
项目:jbossBA    文件:AttributeSummaryPanel.java   
/**
 * Sets the gui elements for fields that are stored in the AttributeStats
 * structure.
 * 
 * @param index the index of the attribute
 */
protected void setDerived(int index) {

  AttributeStats as = m_AttributeStats[index];
  long percent = Math.round(100.0 * as.missingCount / as.totalCount);
  m_MissingLab.setText("" + as.missingCount + " (" + percent + "%)");
  percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
  m_UniqueLab.setText("" + as.uniqueCount + " (" + percent + "%)");
  m_DistinctLab.setText("" + as.distinctCount);
  setTable(as, index);
}
项目:jbossBA    文件:MathExpression.java   
/**
  * Signify that this batch of input to the filter is finished. 
  * If the filter requires all instances prior to filtering,
  * output() may now be called to retrieve the filtered instances.
  *
  * @return true if there are instances pending output
  * @throws IllegalStateException if no input structure has been defined
  */
 public boolean batchFinished() throws Exception {

   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }
   if (m_attStats == null) {
     Instances input = getInputFormat();

     m_attStats = new AttributeStats [input.numAttributes()];

     for (int i = 0; i < input.numAttributes(); i++) {
if (input.attribute(i).isNumeric() &&
    (input.classIndex() != i)) {
  m_attStats[i] = input.attributeStats(i);
}
     }

     // Convert pending input instances
     for(int i = 0; i < input.numInstances(); i++) {
convertInstance(input.instance(i));
     }
   } 
   // Free memory
   flushInput();

   m_NewBatch = true;
   return (numPendingOutput() != 0);
 }
项目:moa    文件:CobWeb.java   
/**
 * Update attribute stats using the supplied instance.
 *
 * @param updateInstance the instance for updating
 * @param delete true if the values of the supplied instance are
 * to be removed from the statistics
 */
protected void updateStats(Instance updateInstance,
        boolean delete) {

    if (m_attStats == null) {
        m_attStats = new AttributeStats[m_numAttributes];
        for (int i = 0; i < m_numAttributes; i++) {
            m_attStats[i] = new AttributeStats();
            if (m_clusterInstances.attribute(i).isNominal()) {
                m_attStats[i].nominalCounts =
                        new int[m_clusterInstances.attribute(i).numValues()];
            } else {
                m_attStats[i].numericStats = new Stats();
            }
        }
    }
    for (int i = 0; i < m_numAttributes; i++) {
        if (!updateInstance.isMissing(i)) {
            double value = updateInstance.value(i);
            if (m_clusterInstances.attribute(i).isNominal()) {
                m_attStats[i].nominalCounts[(int) value] += (delete)
                        ? (-1.0 * updateInstance.weight())
                        : updateInstance.weight();
                m_attStats[i].totalCount += (delete)
                        ? (-1.0 * updateInstance.weight())
                        : updateInstance.weight();
            } else {
                if (delete) {
                    m_attStats[i].numericStats.subtract(value,
                            updateInstance.weight());
                } else {
                    m_attStats[i].numericStats.add(value, updateInstance.weight());
                }
            }
        }
    }
    m_totalInstances += (delete)
            ? (-1.0 * updateInstance.weight())
            : (updateInstance.weight());
}
项目:repo.kmeanspp.silhouette_score    文件:Apriori.java   
/**
 * Removes columns that are all missing from the data
 * 
 * @param instances the instances
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances)
  throws Exception {

  int numInstances = instances.numInstances();
  StringBuffer deleteString = new StringBuffer();
  int removeCount = 0;
  boolean first = true;
  int maxCount = 0;

  for (int i = 0; i < instances.numAttributes(); i++) {
    AttributeStats as = instances.attributeStats(i);
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
      // see if we can decrease this by looking for the most frequent value
      int[] counts = as.nominalCounts;
      if (counts[Utils.maxIndex(counts)] > maxCount) {
        maxCount = counts[Utils.maxIndex(counts)];
      }
    }
    if (as.missingCount == numInstances) {
      if (first) {
        deleteString.append((i + 1));
        first = false;
      } else {
        deleteString.append("," + (i + 1));
      }
      removeCount++;
    }
  }
  if (m_verbose) {
    System.err.println("Removed : " + removeCount
      + " columns with all missing " + "values.");
  }
  if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
    m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
    if (m_verbose) {
      System.err.println("Setting upper bound min support to : "
        + m_upperBoundMinSupport);
    }
  }

  if (deleteString.toString().length() > 0) {
    Remove af = new Remove();
    af.setAttributeIndices(deleteString.toString());
    af.setInvertSelection(false);
    af.setInputFormat(instances);
    Instances newInst = Filter.useFilter(instances, af);

    return newInst;
  }
  return instances;
}
项目:repo.kmeanspp.silhouette_score    文件:RemoveUseless.java   
/**
 * Signify that this batch of input to the filter is finished.
 * 
 * @return true if there are instances pending output
 * @throws Exception if no input format defined
 */
@Override
public boolean batchFinished() throws Exception {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }
  if (m_removeFilter == null) {

    // establish attributes to remove from first batch

    Instances toFilter = getInputFormat();
    int[] attsToDelete = new int[toFilter.numAttributes()];
    int numToDelete = 0;
    for (int i = 0; i < toFilter.numAttributes(); i++) {
      if (i == toFilter.classIndex()) {
        continue; // skip class
      }
      AttributeStats stats = toFilter.attributeStats(i);
      if (stats.missingCount == toFilter.numInstances()) {
        attsToDelete[numToDelete++] = i;
      } else if (stats.distinctCount < 2) {
        // remove constant attributes
        attsToDelete[numToDelete++] = i;
      } else if (toFilter.attribute(i).isNominal()) {
        // remove nominal attributes that vary too much
        double variancePercent = (double) stats.distinctCount
          / (double) (stats.totalCount - stats.missingCount) * 100.0;
        if (variancePercent > m_maxVariancePercentage) {
          attsToDelete[numToDelete++] = i;
        }
      }
    }

    int[] finalAttsToDelete = new int[numToDelete];
    System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);

    m_removeFilter = new Remove();
    m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
    m_removeFilter.setInvertSelection(false);
    m_removeFilter.setInputFormat(toFilter);

    for (int i = 0; i < toFilter.numInstances(); i++) {
      m_removeFilter.input(toFilter.instance(i));
    }
    m_removeFilter.batchFinished();

    Instance processed;
    Instances outputDataset = m_removeFilter.getOutputFormat();

    // restore old relation name to hide attribute filter stamp
    outputDataset.setRelationName(toFilter.relationName());

    setOutputFormat(outputDataset);
    while ((processed = m_removeFilter.output()) != null) {
      processed.setDataset(outputDataset);
      push(processed);
    }
  }
  flushInput();

  m_NewBatch = true;
  return (numPendingOutput() != 0);
}
项目:repo.kmeanspp.silhouette_score    文件:RemoveFrequentValues.java   
/**
 * determines the values to retain, it is always at least 1 and up to the
 * maximum number of distinct values
 * 
 * @param inst the Instances to determine the values from which are kept
 */
public void determineValues(Instances inst) {
  int i;
  AttributeStats stats;
  int attIdx;
  int min;
  int max;
  int count;

  m_AttIndex.setUpper(inst.numAttributes() - 1);
  attIdx = m_AttIndex.getIndex();

  // init names
  m_Values = new HashSet<String>();

  // number of values to retain
  stats = inst.attributeStats(attIdx);
  if (m_Invert) {
    count = stats.nominalCounts.length - m_NumValues;
  } else {
    count = m_NumValues;
  }
  // out of bounds? -> fix
  if (count < 1) {
    count = 1; // at least one value!
  }
  if (count > stats.nominalCounts.length) {
    count = stats.nominalCounts.length; // at max the existing values
  }

  // determine min/max occurences
  Arrays.sort(stats.nominalCounts);
  if (m_LeastValues) {
    min = stats.nominalCounts[0];
    max = stats.nominalCounts[count - 1];
  } else {
    min = stats.nominalCounts[(stats.nominalCounts.length - 1) - count + 1];
    max = stats.nominalCounts[stats.nominalCounts.length - 1];
  }

  // add values if they are inside min/max (incl. borders) and not more than
  // count
  stats = inst.attributeStats(attIdx);
  for (i = 0; i < stats.nominalCounts.length; i++) {
    if ((stats.nominalCounts[i] >= min) && (stats.nominalCounts[i] <= max)
      && (m_Values.size() < count)) {
      m_Values.add(inst.attribute(attIdx).value(i));
    }
  }
}
项目:collective-classification-weka-package    文件:CollectiveForest.java   
/**
 * here initialization and building, possible iterations will happen
 * 
 * @throws Exception    if something goes wrong
 */
@Override
protected void build() throws Exception {
  AttributeStats        stats;
  int                   i;

  // determine number of features to be selected
  m_KValue = getNumFeatures();
  if (m_KValue < 1) 
    m_KValue = (int) Utils.log2(m_Trainset.numAttributes()) + 1;

  // determine class distribution
  m_ClassDistribution = new double[2];
  stats = m_Trainset.attributeStats(m_Trainset.classIndex());
  for (i = 0; i < 2; i++) {
    if (stats.totalCount > 0)
      m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount;
    else
      m_ClassDistribution[i] = 0;
  }

  // the number of instances added to the training set in each iteration
  m_InstancesPerIteration =   (double) m_Testset.numInstances() 
                            / getFolds();
  if (getDebug())
    System.out.println("InstancesPerIteration: " + m_InstancesPerIteration);

  // build list of sorted test instances
  m_List = new RankedList(m_Testset, m_ClassDistribution);

  // build classifier
  m_Random = new Random(getSeed());
  for (i = 0; i <= getFolds(); i++) {
    if (getVerbose()) {
      if (getCutOff() > 0)
        System.out.println(   "\nFold " + i + "/" + getFolds() 
                            + " (CutOff at " + getCutOff() + ")");
      else
        System.out.println("\nFold " + i + "/" + getFolds());
    }
    buildTrainSet(i);
    buildClassifier();

    // cutoff of folds reached?
    if ( (i > 0) && (i == getCutOff()) )
      break;
  }
}