Java 类weka.core.ContingencyTables 实例源码

项目:repo.kmeanspp.silhouette_score    文件:GainRatioSplitCrit.java   
/**
 * Help method for computing the split entropy.
 */
private final double splitEnt(Distribution bags, double totalnoInst) {

  double returnValue = 0;
  double noUnknown;
  int i;

  noUnknown = totalnoInst - bags.total();
  if (Utils.gr(bags.total(), 0)) {
    for (i = 0; i < bags.numBags(); i++) {
      returnValue = returnValue - lnFunc(bags.perBag(i));
    }
    returnValue = returnValue - lnFunc(noUnknown);
    returnValue = returnValue + lnFunc(totalnoInst);
  }
  return returnValue / ContingencyTables.log2;
}
项目:repo.kmeanspp.silhouette_score    文件:EntropySplitCrit.java   
/**
  * Computes entropy of test distribution with respect to training distribution.
  */
 public final double splitCritValue(Distribution train, Distribution test) {

   double result = 0;
   int numClasses = 0;
   int i, j;

   // Find out relevant number of classes
   for (j = 0; j < test.numClasses(); j++)
     if (Utils.gr(train.perClass(j), 0) || Utils.gr(test.perClass(j), 0))
numClasses++;

   // Compute entropy of test data with respect to training data
   for (i = 0; i < test.numBags(); i++)
     if (Utils.gr(test.perBag(i),0)) {
for (j = 0; j < test.numClasses(); j++)
  if (Utils.gr(test.perClassPerBag(i, j), 0))
    result -= test.perClassPerBag(i, j)*
      Math.log(train.perClassPerBag(i, j) + 1);
result += test.perBag(i) * Math.log(train.perBag(i) + numClasses);
     }

   return result / ContingencyTables.log2;
 }
项目:umple    文件:GainRatioSplitCrit.java   
/**
 * Help method for computing the split entropy.
 */
private final double splitEnt(Distribution bags, double totalnoInst) {

  double returnValue = 0;
  double noUnknown;
  int i;

  noUnknown = totalnoInst - bags.total();
  if (Utils.gr(bags.total(), 0)) {
    for (i = 0; i < bags.numBags(); i++) {
      returnValue = returnValue - lnFunc(bags.perBag(i));
    }
    returnValue = returnValue - lnFunc(noUnknown);
    returnValue = returnValue + lnFunc(totalnoInst);
  }
  return returnValue / ContingencyTables.log2;
}
项目:umple    文件:EntropySplitCrit.java   
/**
  * Computes entropy of test distribution with respect to training distribution.
  */
 public final double splitCritValue(Distribution train, Distribution test) {

   double result = 0;
   int numClasses = 0;
   int i, j;

   // Find out relevant number of classes
   for (j = 0; j < test.numClasses(); j++)
     if (Utils.gr(train.perClass(j), 0) || Utils.gr(test.perClass(j), 0))
numClasses++;

   // Compute entropy of test data with respect to training data
   for (i = 0; i < test.numBags(); i++)
     if (Utils.gr(test.perBag(i),0)) {
for (j = 0; j < test.numClasses(); j++)
  if (Utils.gr(test.perClassPerBag(i, j), 0))
    result -= test.perClassPerBag(i, j)*
      Math.log(train.perClassPerBag(i, j) + 1);
result += test.perBag(i) * Math.log(train.perBag(i) + numClasses);
     }

   return result / ContingencyTables.log2;
 }
项目:repo.kmeanspp.silhouette_score    文件:EntropyBasedSplitCrit.java   
/**
 * Help method for computing entropy.
 */
public final double lnFunc(double num) {

  // Constant hard coded for efficiency reasons
  if (num < 1e-6)
    return 0;
  else
    return ContingencyTables.lnFunc(num);
}
项目:repo.kmeanspp.silhouette_score    文件:EntropyBasedSplitCrit.java   
/**
 * Computes entropy of distribution before splitting.
 */
public final double oldEnt(Distribution bags) {

  double returnValue = 0;
  int j;

  for (j=0;j<bags.numClasses();j++)
    returnValue = returnValue+lnFunc(bags.perClass(j));
  return (lnFunc(bags.total())-returnValue)/ContingencyTables.log2; 
}
项目:repo.kmeanspp.silhouette_score    文件:EntropyBasedSplitCrit.java   
/**
  * Computes entropy of distribution after splitting.
  */
 public final double newEnt(Distribution bags) {

   double returnValue = 0;
   int i,j;

   for (i=0;i<bags.numBags();i++){
     for (j=0;j<bags.numClasses();j++)
returnValue = returnValue+lnFunc(bags.perClassPerBag(i,j));
     returnValue = returnValue-lnFunc(bags.perBag(i));
   }
   return -(returnValue/ContingencyTables.log2);
 }
项目:repo.kmeanspp.silhouette_score    文件:EntropyBasedSplitCrit.java   
/**
 * Computes entropy after splitting without considering the
 * class values.
 */
public final double splitEnt(Distribution bags) {

  double returnValue = 0;
  int i;

  for (i=0;i<bags.numBags();i++)
    returnValue = returnValue+lnFunc(bags.perBag(i));
  return (lnFunc(bags.total())-returnValue)/ContingencyTables.log2;
}
项目:repo.kmeanspp.silhouette_score    文件:ClassifierDecList.java   
/**
 * Method for choosing a subset to expand.
 */
public final int chooseIndex() {

  int minIndex = -1;
  double estimated, min = Double.MAX_VALUE;
  int i, j;

  for (i = 0; i < m_sons.length; i++) {
    if (son(i) == null) {
      if (Utils.sm(localModel().distribution().perBag(i), m_minNumObj)) {
        estimated = Double.MAX_VALUE;
      } else {
        estimated = 0;
        for (j = 0; j < localModel().distribution().numClasses(); j++) {
          estimated -= m_splitCrit.lnFunc(localModel().distribution()
            .perClassPerBag(i, j));
        }
        estimated += m_splitCrit
          .lnFunc(localModel().distribution().perBag(i));
        estimated /= (localModel().distribution().perBag(i) * ContingencyTables.log2);
      }
      if (Utils.smOrEq(estimated, 0)) {
        return i;
      }
      if (Utils.sm(estimated, min)) {
        min = estimated;
        minIndex = i;
      }
    }
  }

  return minIndex;
}
项目:umple    文件:EntropyBasedSplitCrit.java   
/**
 * Help method for computing entropy.
 */
public final double lnFunc(double num) {

  // Constant hard coded for efficiency reasons
  if (num < 1e-6)
    return 0;
  else
    return ContingencyTables.lnFunc(num);
}
项目:umple    文件:EntropyBasedSplitCrit.java   
/**
 * Computes entropy of distribution before splitting.
 */
public final double oldEnt(Distribution bags) {

  double returnValue = 0;
  int j;

  for (j=0;j<bags.numClasses();j++)
    returnValue = returnValue+lnFunc(bags.perClass(j));
  return (lnFunc(bags.total())-returnValue)/ContingencyTables.log2; 
}
项目:umple    文件:EntropyBasedSplitCrit.java   
/**
  * Computes entropy of distribution after splitting.
  */
 public final double newEnt(Distribution bags) {

   double returnValue = 0;
   int i,j;

   for (i=0;i<bags.numBags();i++){
     for (j=0;j<bags.numClasses();j++)
returnValue = returnValue+lnFunc(bags.perClassPerBag(i,j));
     returnValue = returnValue-lnFunc(bags.perBag(i));
   }
   return -(returnValue/ContingencyTables.log2);
 }
项目:umple    文件:EntropyBasedSplitCrit.java   
/**
 * Computes entropy after splitting without considering the
 * class values.
 */
public final double splitEnt(Distribution bags) {

  double returnValue = 0;
  int i;

  for (i=0;i<bags.numBags();i++)
    returnValue = returnValue+lnFunc(bags.perBag(i));
  return (lnFunc(bags.total())-returnValue)/ContingencyTables.log2;
}
项目:umple    文件:ClassifierDecList.java   
/**
 * Method for choosing a subset to expand.
 */
public final int chooseIndex() {

  int minIndex = -1;
  double estimated, min = Double.MAX_VALUE;
  int i, j;

  for (i = 0; i < m_sons.length; i++) {
    if (son(i) == null) {
      if (Utils.sm(localModel().distribution().perBag(i), m_minNumObj)) {
        estimated = Double.MAX_VALUE;
      } else {
        estimated = 0;
        for (j = 0; j < localModel().distribution().numClasses(); j++) {
          estimated -= m_splitCrit.lnFunc(localModel().distribution()
            .perClassPerBag(i, j));
        }
        estimated += m_splitCrit
          .lnFunc(localModel().distribution().perBag(i));
        estimated /= (localModel().distribution().perBag(i) * ContingencyTables.log2);
      }
      if (Utils.smOrEq(estimated, 0)) {
        return i;
      }
      if (Utils.sm(estimated, min)) {
        min = estimated;
        minIndex = i;
      }
    }
  }

  return minIndex;
}
项目:repo.kmeanspp.silhouette_score    文件:DecisionStump.java   
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 protected double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
              + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }

   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
         m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
           m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
           m_Instances.numClasses());
   }

   m_Distribution = bestDist;
   return bestVal;
 }
项目:repo.kmeanspp.silhouette_score    文件:Discretize.java   
/**
 * Test using Fayyad and Irani's MDL criterion.
 * 
 * @param priorCounts
 * @param bestCounts
 * @param numInstances
 * @param numCutPoints
 * @return true if the splits is acceptable
 */
private boolean FayyadAndIranisMDL(double[] priorCounts,
  double[][] bestCounts, double numInstances, int numCutPoints) {

  double priorEntropy, entropy, gain;
  double entropyLeft, entropyRight, delta;
  int numClassesTotal, numClassesRight, numClassesLeft;

  // Compute entropy before split.
  priorEntropy = ContingencyTables.entropy(priorCounts);

  // Compute entropy after split.
  entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

  // Compute information gain.
  gain = priorEntropy - entropy;

  // Number of classes occuring in the set
  numClassesTotal = 0;
  for (double priorCount : priorCounts) {
    if (priorCount > 0) {
      numClassesTotal++;
    }
  }

  // Number of classes occuring in the left subset
  numClassesLeft = 0;
  for (int i = 0; i < bestCounts[0].length; i++) {
    if (bestCounts[0][i] > 0) {
      numClassesLeft++;
    }
  }

  // Number of classes occuring in the right subset
  numClassesRight = 0;
  for (int i = 0; i < bestCounts[1].length; i++) {
    if (bestCounts[1][i] > 0) {
      numClassesRight++;
    }
  }

  // Entropy of the left and the right subsets
  entropyLeft = ContingencyTables.entropy(bestCounts[0]);
  entropyRight = ContingencyTables.entropy(bestCounts[1]);

  // Compute terms for MDL formula
  delta = Utils.log2(Math.pow(3, numClassesTotal) - 2)
    - ((numClassesTotal * priorEntropy) - (numClassesRight * entropyRight) - (numClassesLeft * entropyLeft));

  // Check if split is to be accepted
  return (gain > (Utils.log2(numCutPoints) + delta) / numInstances);
}
项目:autoweka    文件:DecisionStump.java   
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 private double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
              + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }

   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
         m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
           m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
           m_Instances.numClasses());
   }

   m_Distribution = bestDist;
   return bestVal;
 }
项目:autoweka    文件:Discretize.java   
/**
  * Test using Fayyad and Irani's MDL criterion.
  *
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the splits is acceptable
  */
 private boolean FayyadAndIranisMDL(double[] priorCounts,
                 double[][] bestCounts,
                 double numInstances,
                 int numCutPoints) {

   double priorEntropy, entropy, gain;
   double entropyLeft, entropyRight, delta;
   int numClassesTotal, numClassesRight, numClassesLeft;

   // Compute entropy before split.
   priorEntropy = ContingencyTables.entropy(priorCounts);

   // Compute entropy after split.
   entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

   // Compute information gain.
   gain = priorEntropy - entropy;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Number of classes occuring in the left subset
   numClassesLeft = 0;
   for (int i = 0; i < bestCounts[0].length; i++) {
     if (bestCounts[0][i] > 0) {
numClassesLeft++;
     }
   }

   // Number of classes occuring in the right subset
   numClassesRight = 0;
   for (int i = 0; i < bestCounts[1].length; i++) {
     if (bestCounts[1][i] > 0) {
numClassesRight++;
     }
   }

   // Entropy of the left and the right subsets
   entropyLeft = ContingencyTables.entropy(bestCounts[0]);
   entropyRight = ContingencyTables.entropy(bestCounts[1]);

   // Compute terms for MDL formula
   delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
     (((double) numClassesTotal * priorEntropy) -
      (numClassesRight * entropyRight) -
      (numClassesLeft * entropyLeft));

   // Check if split is to be accepted
   return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
 }
项目:umple    文件:DecisionStump.java   
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 protected double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
              + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }

   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
         m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
           m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
           m_Instances.numClasses());
   }

   m_Distribution = bestDist;
   return bestVal;
 }
项目:umple    文件:Discretize.java   
/**
 * Test using Fayyad and Irani's MDL criterion.
 * 
 * @param priorCounts
 * @param bestCounts
 * @param numInstances
 * @param numCutPoints
 * @return true if the splits is acceptable
 */
private boolean FayyadAndIranisMDL(double[] priorCounts,
  double[][] bestCounts, double numInstances, int numCutPoints) {

  double priorEntropy, entropy, gain;
  double entropyLeft, entropyRight, delta;
  int numClassesTotal, numClassesRight, numClassesLeft;

  // Compute entropy before split.
  priorEntropy = ContingencyTables.entropy(priorCounts);

  // Compute entropy after split.
  entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

  // Compute information gain.
  gain = priorEntropy - entropy;

  // Number of classes occuring in the set
  numClassesTotal = 0;
  for (double priorCount : priorCounts) {
    if (priorCount > 0) {
      numClassesTotal++;
    }
  }

  // Number of classes occuring in the left subset
  numClassesLeft = 0;
  for (int i = 0; i < bestCounts[0].length; i++) {
    if (bestCounts[0][i] > 0) {
      numClassesLeft++;
    }
  }

  // Number of classes occuring in the right subset
  numClassesRight = 0;
  for (int i = 0; i < bestCounts[1].length; i++) {
    if (bestCounts[1][i] > 0) {
      numClassesRight++;
    }
  }

  // Entropy of the left and the right subsets
  entropyLeft = ContingencyTables.entropy(bestCounts[0]);
  entropyRight = ContingencyTables.entropy(bestCounts[1]);

  // Compute terms for MDL formula
  delta = Utils.log2(Math.pow(3, numClassesTotal) - 2)
    - ((numClassesTotal * priorEntropy) - (numClassesRight * entropyRight) - (numClassesLeft * entropyLeft));

  // Check if split is to be accepted
  return (gain > (Utils.log2(numCutPoints) + delta) / numInstances);
}
项目:jbossBA    文件:DecisionStump.java   
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 private double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
              + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }

   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
         m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
           m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
           m_Instances.numClasses());
   }

   m_Distribution = bestDist;
   return bestVal;
 }
项目:jbossBA    文件:Discretize.java   
/** 
  * Test using Fayyad and Irani's MDL criterion.
  * 
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the splits is acceptable
  */
 private boolean FayyadAndIranisMDL(double[] priorCounts,
                 double[][] bestCounts,
                 double numInstances,
                 int numCutPoints) {

   double priorEntropy, entropy, gain; 
   double entropyLeft, entropyRight, delta;
   int numClassesTotal, numClassesRight, numClassesLeft;

   // Compute entropy before split.
   priorEntropy = ContingencyTables.entropy(priorCounts);

   // Compute entropy after split.
   entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

   // Compute information gain.
   gain = priorEntropy - entropy;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Number of classes occuring in the left subset
   numClassesLeft = 0;
   for (int i = 0; i < bestCounts[0].length; i++) {
     if (bestCounts[0][i] > 0) {
numClassesLeft++;
     }
   }

   // Number of classes occuring in the right subset
   numClassesRight = 0;
   for (int i = 0; i < bestCounts[1].length; i++) {
     if (bestCounts[1][i] > 0) {
numClassesRight++;
     }
   }

   // Entropy of the left and the right subsets
   entropyLeft = ContingencyTables.entropy(bestCounts[0]);
   entropyRight = ContingencyTables.entropy(bestCounts[1]);

   // Compute terms for MDL formula
   delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) - 
     (((double) numClassesTotal * priorEntropy) - 
      (numClassesRight * entropyRight) - 
      (numClassesLeft * entropyLeft));

   // Check if split is to be accepted
   return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
 }
项目:seqcode-core    文件:AttributeRandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:seqcode-core    文件:AttributeRandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:repo.kmeanspp.silhouette_score    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:repo.kmeanspp.silhouette_score    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:repo.kmeanspp.silhouette_score    文件:REPTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:repo.kmeanspp.silhouette_score    文件:REPTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 * @param priorVal the splitting criterion
 * @return the gain after splitting
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:Keel3.0    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:Keel3.0    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:Keel3.0    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:Keel3.0    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:Keel3.0    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:Keel3.0    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:KEEL    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:KEEL    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:KEEL    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:KEEL    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
项目:KEEL    文件:RandomTree.java   
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
项目:KEEL    文件:RandomTree.java   
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}