Java 类weka.core.Statistics 实例源码

项目:repo.kmeanspp.silhouette_score    文件:UnivariateEqualFrequencyHistogramEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
@Override
public double predictQuantile(double percentage) {

  // Update the bandwidth
  updateBoundariesAndOrWeights();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:repo.kmeanspp.silhouette_score    文件:UnivariateKernelEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
@Override
public double predictQuantile(double percentage) {

  // Update the bandwidth
  updateWidth();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:repo.kmeanspp.silhouette_score    文件:KKConditionalEstimator.java   
/**
 * Get a probability estimator for a value
 *
 * @param given the new value that data is conditional upon 
 * @return the estimator for the supplied value given the condition
 */
public Estimator getEstimator(double given) {

  Estimator result = new KernelEstimator(m_Precision);
  if (m_NumValues == 0) {
    return result;
  }

  double delta = 0, currentProb = 0;
  double zLower, zUpper;
  for(int i = 0; i < m_NumValues; i++) {
    delta = m_CondValues[i] - given;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper)
     - Statistics.normalProbability(zLower));
    result.addValue(m_Values[i], currentProb * m_Weights[i]);
  }
  return result;
}
项目:autoweka    文件:UnivariateEqualFrequencyHistogramEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
public double predictQuantile(double percentage) {

  // Update the bandwidth
  updateBoundariesAndOrWeights();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  // Compute approximate quantile
  double[] probabilities = new double[m_NumIntervals];
  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:autoweka    文件:UnivariateKernelEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
public double predictQuantile(double percentage) {

  // Update the bandwidth
  updateWidth();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  // Compute approximate quantile
  double[] probabilities = new double[m_NumIntervals];
  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:autoweka    文件:KKConditionalEstimator.java   
/**
 * Get a probability estimator for a value
 *
 * @param given the new value that data is conditional upon 
 * @return the estimator for the supplied value given the condition
 */
public Estimator getEstimator(double given) {

  Estimator result = new KernelEstimator(m_Precision);
  if (m_NumValues == 0) {
    return result;
  }

  double delta = 0, currentProb = 0;
  double zLower, zUpper;
  for(int i = 0; i < m_NumValues; i++) {
    delta = m_CondValues[i] - given;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper)
     - Statistics.normalProbability(zLower));
    result.addValue(m_Values[i], currentProb * m_Weights[i]);
  }
  return result;
}
项目:umple    文件:UnivariateEqualFrequencyHistogramEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
@Override
public double predictQuantile(double percentage) {

  // Update the bandwidth
  updateBoundariesAndOrWeights();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:umple    文件:UnivariateKernelEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
@Override
public double predictQuantile(double percentage) {

  // Update the bandwidth
  updateWidth();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:umple    文件:KKConditionalEstimator.java   
/**
 * Get a probability estimator for a value
 *
 * @param given the new value that data is conditional upon 
 * @return the estimator for the supplied value given the condition
 */
public Estimator getEstimator(double given) {

  Estimator result = new KernelEstimator(m_Precision);
  if (m_NumValues == 0) {
    return result;
  }

  double delta = 0, currentProb = 0;
  double zLower, zUpper;
  for(int i = 0; i < m_NumValues; i++) {
    delta = m_CondValues[i] - given;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper)
     - Statistics.normalProbability(zLower));
    result.addValue(m_Values[i], currentProb * m_Weights[i]);
  }
  return result;
}
项目:jbossBA    文件:KKConditionalEstimator.java   
/**
 * Get a probability estimator for a value
 *
 * @param given the new value that data is conditional upon 
 * @return the estimator for the supplied value given the condition
 */
public Estimator getEstimator(double given) {

  Estimator result = new KernelEstimator(m_Precision);
  if (m_NumValues == 0) {
    return result;
  }

  double delta = 0, currentProb = 0;
  double zLower, zUpper;
  for(int i = 0; i < m_NumValues; i++) {
    delta = m_CondValues[i] - given;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper)
     - Statistics.normalProbability(zLower));
    result.addValue(m_Values[i], currentProb * m_Weights[i]);
  }
  return result;
}
项目:repo.kmeanspp.silhouette_score    文件:NormalEstimator.java   
/**
 * Get a probability estimate for a value
 * 
 * @param data the value to estimate the probability of
 * @return the estimated probability of the supplied value
 */
@Override
public double getProbability(double data) {

  data = round(data);
  double zLower = (data - m_Mean - (m_Precision / 2)) / m_StandardDev;
  double zUpper = (data - m_Mean + (m_Precision / 2)) / m_StandardDev;

  double pLower = Statistics.normalProbability(zLower);
  double pUpper = Statistics.normalProbability(zUpper);
  return pUpper - pLower;
}
项目:repo.kmeanspp.silhouette_score    文件:UnivariateMixtureEstimator.java   
/**
 * Returns the quantile for the given percentage.
 * 
 * @param percentage the percentage
 * @return the quantile
 */
public double predictQuantile(double percentage) {

  // Compute minimum and maximum value, and delta
  double valRight = Statistics.normalInverse(percentage);
  double valLeft = Statistics.normalInverse(0.001);
  double min = Double.MAX_VALUE;
  double max = -Double.MAX_VALUE;
  for (int i = 0; i < m_Means.length; i++) {
    double l = m_Means[i] - valLeft * m_StdDevs[i];
    if (l < min) {
      min = l;
    }
    double r = m_Means[i] + valRight * m_StdDevs[i];
    if (r > max) {
      max = r;
    }
  }
  double delta = (max - min) / m_NumIntervals;

  double sum = 0;
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    if (sum >= percentage) {
      return min + i * delta;
    }
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    sum += 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }
  return max;
}
项目:autoweka    文件:NormalEstimator.java   
/**
 * Get a probability estimate for a value
 *
 * @param data the value to estimate the probability of
 * @return the estimated probability of the supplied value
 */
public double getProbability(double data) {

  data = round(data);
  double zLower = (data - m_Mean - (m_Precision / 2)) / m_StandardDev;
  double zUpper = (data - m_Mean + (m_Precision / 2)) / m_StandardDev;

  double pLower = Statistics.normalProbability(zLower);
  double pUpper = Statistics.normalProbability(zUpper);
  return pUpper - pLower;
}
项目:umple    文件:NormalEstimator.java   
/**
 * Get a probability estimate for a value
 * 
 * @param data the value to estimate the probability of
 * @return the estimated probability of the supplied value
 */
@Override
public double getProbability(double data) {

  data = round(data);
  double zLower = (data - m_Mean - (m_Precision / 2)) / m_StandardDev;
  double zUpper = (data - m_Mean + (m_Precision / 2)) / m_StandardDev;

  double pLower = Statistics.normalProbability(zLower);
  double pUpper = Statistics.normalProbability(zUpper);
  return pUpper - pLower;
}
项目:jbossBA    文件:RuleGeneration.java   
/**
 * calculates the probability using a binomial distribution.
 * If the support of the premise is too large this distribution
 * is approximated by a normal distribution.
 * @param accuracy the accuracy value
 * @param ruleCount the support of the whole rule
 * @param premiseCount the support of the premise
 * @return the probability value
 */  
public static final double binomialDistribution(double accuracy, double ruleCount, double premiseCount){

  double mu, sigma;

  if(premiseCount < MAX_N) 
    return Math.pow(2,(Utils.log2(Math.pow(accuracy,ruleCount))+Utils.log2(Math.pow((1.0-accuracy),(premiseCount-ruleCount)))+PriorEstimation.logbinomialCoefficient((int)premiseCount,(int)ruleCount)));
  else{
    mu = premiseCount * accuracy;
    sigma = Math.sqrt((premiseCount * (1.0 - accuracy))*accuracy);
    return Statistics.normalProbability(((ruleCount+0.5)-mu)/(sigma*Math.sqrt(2)));
  }
}
项目:jbossBA    文件:RaceSearch.java   
/** 
 * t-test for unequal sample sizes and same variance. Returns probability
 * that observed difference in means is due to chance.
 */
private double ttest(Stats c1, Stats c2) throws Exception {
  double n1 = c1.count; double n2 = c2.count;
  double v1 = c1.stdDev * c1.stdDev;
  double v2 = c2.stdDev * c2.stdDev;
  double av1 = c1.mean;
  double av2 = c2.mean;

  double df = n1 + n2 - 2;
  double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df;
  double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2)));

  return Statistics.incompleteBeta(df / 2.0, 0.5,
                                   df / (df + (t * t)));
}
项目:jbossBA    文件:NormalEstimator.java   
/**
 * Get a probability estimate for a value
 *
 * @param data the value to estimate the probability of
 * @return the estimated probability of the supplied value
 */
public double getProbability(double data) {

  data = round(data);
  double zLower = (data - m_Mean - (m_Precision / 2)) / m_StandardDev;
  double zUpper = (data - m_Mean + (m_Precision / 2)) / m_StandardDev;

  double pLower = Statistics.normalProbability(zLower);
  double pUpper = Statistics.normalProbability(zUpper);
  return pUpper - pLower;
}
项目:repo.kmeanspp.silhouette_score    文件:Stats.java   
/**
  * Computes estimated extra error for given total number of instances
  * and error using normal approximation to binomial distribution
  * (and continuity correction).
  *
  * @param N number of instances
  * @param e observed error
  * @param CF confidence value
  */
 public static double addErrs(double N, double e, float CF){

   // Ignore stupid values for CF
   if (CF > 0.5) {
     System.err.println("WARNING: confidence value for pruning " +
         " too high. Error estimate not modified.");
     return 0;
   }

   // Check for extreme cases at the low end because the
   // normal approximation won't work
   if (e < 1) {

     // Base case (i.e. e == 0) from documenta Geigy Scientific
     // Tables, 6th edition, page 185
     double base = N * (1 - Math.pow(CF, 1 / N)); 
     if (e == 0) {
return base; 
     }

     // Use linear interpolation between 0 and 1 like C4.5 does
     return base + e * (addErrs(N, 1, CF) - base);
   }

   // Use linear interpolation at the high end (i.e. between N - 0.5
   // and N) because of the continuity correction
   if (e + 0.5 >= N) {

     // Make sure that we never return anything smaller than zero
     return Math.max(N - e, 0);
   }

   // Get z-score corresponding to CF
   double z = Statistics.normalInverse(1 - CF);

   // Compute upper limit of confidence interval
   double  f = (e + 0.5) / N;
   double r = (f + (z * z) / (2 * N) +
    z * Math.sqrt((f / N) - 
              (f * f / N) + 
              (z * z / (4 * N * N)))) /
     (1 + (z * z) / N);

   return (r * N) - e;
 }
项目:repo.kmeanspp.silhouette_score    文件:DiscreteEstimatorBayes.java   
/**
 * Gets the log score contribution of this distribution
 * @param nType score type
 * @return the score
 */
public double logScore(int nType, int nCardinality) {
   double fScore = 0.0;

   switch (nType) {

   case (Scoreable.BAYES): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
fScore += Statistics.lnGamma(m_Counts[iSymbol]);
     } 

     fScore -= Statistics.lnGamma(m_SumOfCounts);
     if (m_fPrior != 0.0) {
      fScore -= m_nSymbols * Statistics.lnGamma(m_fPrior);
      fScore += Statistics.lnGamma(m_nSymbols * m_fPrior);
     }
   } 

     break;
  case (Scoreable.BDeu): {
  for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
    fScore += Statistics.lnGamma(m_Counts[iSymbol]);
  } 

  fScore -= Statistics.lnGamma(m_SumOfCounts);
  //fScore -= m_nSymbols * Statistics.lnGamma(1.0);
  //fScore += Statistics.lnGamma(m_nSymbols * 1.0);
     fScore -= m_nSymbols * Statistics.lnGamma(1.0/(m_nSymbols * nCardinality));
     fScore += Statistics.lnGamma(1.0/nCardinality);
} 
  break;

   case (Scoreable.MDL):

   case (Scoreable.AIC):

   case (Scoreable.ENTROPY): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
double fP = getProbability(iSymbol);

fScore += m_Counts[iSymbol] * Math.log(fP);
     } 
   } 

     break;

   default: {}
   }

   return fScore;
 }
项目:repo.kmeanspp.silhouette_score    文件:PairedStats.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:repo.kmeanspp.silhouette_score    文件:PairedStatsCorrected.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {

     double tval = differencesStats.mean
/ Math.sqrt((1 / count + m_testTrainRatio)
        * differencesStats.stdDev * differencesStats.stdDev);

     if (count > 1) {
differencesProbability = Statistics.FProbability(tval * tval, 1,
                         (int) count - 1);
     } else differencesProbability = 1;
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:repo.kmeanspp.silhouette_score    文件:Maths.java   
/**
 *  Returns the cumulative probability of the standard normal.
 *  @param x the quantile
 */
public static double  pnorm( double x ) 
{
  return Statistics.normalProbability( x );
}
项目:repo.kmeanspp.silhouette_score    文件:KernelEstimator.java   
/**
 * Get a probability estimate for a value.
 * 
 * @param data the value to estimate the probability of
 * @return the estimated probability of the supplied value
 */
@Override
public double getProbability(double data) {

  double delta = 0, sum = 0, currentProb = 0;
  double zLower = 0, zUpper = 0;
  if (m_NumValues == 0) {
    zLower = (data - (m_Precision / 2)) / m_StandardDev;
    zUpper = (data + (m_Precision / 2)) / m_StandardDev;
    return (Statistics.normalProbability(zUpper) - Statistics
        .normalProbability(zLower));
  }
  double weightSum = 0;
  int start = findNearestValue(data);
  for (int i = start; i < m_NumValues; i++) {
    delta = m_Values[i] - data;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper) - Statistics
        .normalProbability(zLower));
    sum += currentProb * m_Weights[i];
    /*
     * System.out.print("zL" + (i + 1) + ": " + zLower + " ");
     * System.out.print("zU" + (i + 1) + ": " + zUpper + " ");
     * System.out.print("P" + (i + 1) + ": " + currentProb + " ");
     * System.out.println("total: " + (currentProb * m_Weights[i]) + " ");
     */
    weightSum += m_Weights[i];
    if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) {
      break;
    }
  }
  for (int i = start - 1; i >= 0; i--) {
    delta = m_Values[i] - data;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper) - Statistics
        .normalProbability(zLower));
    sum += currentProb * m_Weights[i];
    weightSum += m_Weights[i];
    if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) {
      break;
    }
  }
  return sum / m_SumOfWeights;
}
项目:autoweka    文件:Stats.java   
/**
  * Computes estimated extra error for given total number of instances
  * and error using normal approximation to binomial distribution
  * (and continuity correction).
  *
  * @param N number of instances
  * @param e observed error
  * @param CF confidence value
  */
 public static double addErrs(double N, double e, float CF){

   // Ignore stupid values for CF
   if (CF > 0.5) {
     System.err.println("WARNING: confidence value for pruning " +
         " too high. Error estimate not modified.");
     return 0;
   }

   // Check for extreme cases at the low end because the
   // normal approximation won't work
   if (e < 1) {

     // Base case (i.e. e == 0) from documenta Geigy Scientific
     // Tables, 6th edition, page 185
     double base = N * (1 - Math.pow(CF, 1 / N)); 
     if (e == 0) {
return base; 
     }

     // Use linear interpolation between 0 and 1 like C4.5 does
     return base + e * (addErrs(N, 1, CF) - base);
   }

   // Use linear interpolation at the high end (i.e. between N - 0.5
   // and N) because of the continuity correction
   if (e + 0.5 >= N) {

     // Make sure that we never return anything smaller than zero
     return Math.max(N - e, 0);
   }

   // Get z-score corresponding to CF
   double z = Statistics.normalInverse(1 - CF);

   // Compute upper limit of confidence interval
   double  f = (e + 0.5) / N;
   double r = (f + (z * z) / (2 * N) +
    z * Math.sqrt((f / N) - 
              (f * f / N) + 
              (z * z / (4 * N * N)))) /
     (1 + (z * z) / N);

   return (r * N) - e;
 }
项目:autoweka    文件:DiscreteEstimatorBayes.java   
/**
 * Gets the log score contribution of this distribution
 * @param nType score type
 * @return the score
 */
public double logScore(int nType, int nCardinality) {
   double fScore = 0.0;

   switch (nType) {

   case (Scoreable.BAYES): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
fScore += Statistics.lnGamma(m_Counts[iSymbol]);
     } 

     fScore -= Statistics.lnGamma(m_SumOfCounts);
     if (m_fPrior != 0.0) {
      fScore -= m_nSymbols * Statistics.lnGamma(m_fPrior);
      fScore += Statistics.lnGamma(m_nSymbols * m_fPrior);
     }
   } 

     break;
  case (Scoreable.BDeu): {
  for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
    fScore += Statistics.lnGamma(m_Counts[iSymbol]);
  } 

  fScore -= Statistics.lnGamma(m_SumOfCounts);
  //fScore -= m_nSymbols * Statistics.lnGamma(1.0);
  //fScore += Statistics.lnGamma(m_nSymbols * 1.0);
     fScore -= m_nSymbols * Statistics.lnGamma(1.0/(m_nSymbols * nCardinality));
     fScore += Statistics.lnGamma(1.0/nCardinality);
} 
  break;

   case (Scoreable.MDL):

   case (Scoreable.AIC):

   case (Scoreable.ENTROPY): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
double fP = getProbability(iSymbol);

fScore += m_Counts[iSymbol] * Math.log(fP);
     } 
   } 

     break;

   default: {}
   }

   return fScore;
 }
项目:autoweka    文件:PairedStats.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:autoweka    文件:PairedStatsCorrected.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {

     double tval = differencesStats.mean
/ Math.sqrt((1 / count + m_testTrainRatio)
        * differencesStats.stdDev * differencesStats.stdDev);

     if (count > 1) {
differencesProbability = Statistics.FProbability(tval * tval, 1,
                         (int) count - 1);
     } else differencesProbability = 1;
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:autoweka    文件:Maths.java   
/**
 *  Returns the cumulative probability of the standard normal.
 *  @param x the quantile
 */
public static double  pnorm( double x ) 
{
  return Statistics.normalProbability( x );
}
项目:autoweka    文件:KernelEstimator.java   
/**
  * Get a probability estimate for a value.
  *
  * @param data the value to estimate the probability of
  * @return the estimated probability of the supplied value
  */
 public double getProbability(double data) {

   double delta = 0, sum = 0, currentProb = 0;
   double zLower = 0, zUpper = 0;
   if (m_NumValues == 0) {
     zLower = (data - (m_Precision / 2)) / m_StandardDev;
     zUpper = (data + (m_Precision / 2)) / m_StandardDev;
     return (Statistics.normalProbability(zUpper)
      - Statistics.normalProbability(zLower));
   }
   double weightSum = 0;
   int start = findNearestValue(data);
   for (int i = start; i < m_NumValues; i++) {
     delta = m_Values[i] - data;
     zLower = (delta - (m_Precision / 2)) / m_StandardDev;
     zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
     currentProb = (Statistics.normalProbability(zUpper)
         - Statistics.normalProbability(zLower));
     sum += currentProb * m_Weights[i];
     /*
     System.out.print("zL" + (i + 1) + ": " + zLower + " ");
     System.out.print("zU" + (i + 1) + ": " + zUpper + " ");
     System.out.print("P" + (i + 1) + ": " + currentProb + " ");
     System.out.println("total: " + (currentProb * m_Weights[i]) + " ");
     */
     weightSum += m_Weights[i];
     if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) {
break;
     }
   }
   for (int i = start - 1; i >= 0; i--) {
     delta = m_Values[i] - data;
     zLower = (delta - (m_Precision / 2)) / m_StandardDev;
     zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
     currentProb = (Statistics.normalProbability(zUpper)
         - Statistics.normalProbability(zLower));
     sum += currentProb * m_Weights[i];
     weightSum += m_Weights[i];
     if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) {
break;
     }
   }
   return sum / m_SumOfWeights;
 }
项目:umple    文件:Stats.java   
/**
  * Computes estimated extra error for given total number of instances
  * and error using normal approximation to binomial distribution
  * (and continuity correction).
  *
  * @param N number of instances
  * @param e observed error
  * @param CF confidence value
  */
 public static double addErrs(double N, double e, float CF){

   // Ignore stupid values for CF
   if (CF > 0.5) {
     System.err.println("WARNING: confidence value for pruning " +
         " too high. Error estimate not modified.");
     return 0;
   }

   // Check for extreme cases at the low end because the
   // normal approximation won't work
   if (e < 1) {

     // Base case (i.e. e == 0) from documenta Geigy Scientific
     // Tables, 6th edition, page 185
     double base = N * (1 - Math.pow(CF, 1 / N)); 
     if (e == 0) {
return base; 
     }

     // Use linear interpolation between 0 and 1 like C4.5 does
     return base + e * (addErrs(N, 1, CF) - base);
   }

   // Use linear interpolation at the high end (i.e. between N - 0.5
   // and N) because of the continuity correction
   if (e + 0.5 >= N) {

     // Make sure that we never return anything smaller than zero
     return Math.max(N - e, 0);
   }

   // Get z-score corresponding to CF
   double z = Statistics.normalInverse(1 - CF);

   // Compute upper limit of confidence interval
   double  f = (e + 0.5) / N;
   double r = (f + (z * z) / (2 * N) +
    z * Math.sqrt((f / N) - 
              (f * f / N) + 
              (z * z / (4 * N * N)))) /
     (1 + (z * z) / N);

   return (r * N) - e;
 }
项目:umple    文件:DiscreteEstimatorBayes.java   
/**
 * Gets the log score contribution of this distribution
 * @param nType score type
 * @return the score
 */
public double logScore(int nType, int nCardinality) {
   double fScore = 0.0;

   switch (nType) {

   case (Scoreable.BAYES): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
fScore += Statistics.lnGamma(m_Counts[iSymbol]);
     } 

     fScore -= Statistics.lnGamma(m_SumOfCounts);
     if (m_fPrior != 0.0) {
      fScore -= m_nSymbols * Statistics.lnGamma(m_fPrior);
      fScore += Statistics.lnGamma(m_nSymbols * m_fPrior);
     }
   } 

     break;
  case (Scoreable.BDeu): {
  for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
    fScore += Statistics.lnGamma(m_Counts[iSymbol]);
  } 

  fScore -= Statistics.lnGamma(m_SumOfCounts);
  //fScore -= m_nSymbols * Statistics.lnGamma(1.0);
  //fScore += Statistics.lnGamma(m_nSymbols * 1.0);
     fScore -= m_nSymbols * Statistics.lnGamma(1.0/(m_nSymbols * nCardinality));
     fScore += Statistics.lnGamma(1.0/nCardinality);
} 
  break;

   case (Scoreable.MDL):

   case (Scoreable.AIC):

   case (Scoreable.ENTROPY): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
double fP = getProbability(iSymbol);

fScore += m_Counts[iSymbol] * Math.log(fP);
     } 
   } 

     break;

   default: {}
   }

   return fScore;
 }
项目:umple    文件:PairedStats.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:umple    文件:PairedStatsCorrected.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {

     double tval = differencesStats.mean
/ Math.sqrt((1 / count + m_testTrainRatio)
        * differencesStats.stdDev * differencesStats.stdDev);

     if (count > 1) {
differencesProbability = Statistics.FProbability(tval * tval, 1,
                         (int) count - 1);
     } else differencesProbability = 1;
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:umple    文件:Maths.java   
/**
 *  Returns the cumulative probability of the standard normal.
 *  @param x the quantile
 */
public static double  pnorm( double x ) 
{
  return Statistics.normalProbability( x );
}
项目:umple    文件:KernelEstimator.java   
/**
 * Get a probability estimate for a value.
 * 
 * @param data the value to estimate the probability of
 * @return the estimated probability of the supplied value
 */
@Override
public double getProbability(double data) {

  double delta = 0, sum = 0, currentProb = 0;
  double zLower = 0, zUpper = 0;
  if (m_NumValues == 0) {
    zLower = (data - (m_Precision / 2)) / m_StandardDev;
    zUpper = (data + (m_Precision / 2)) / m_StandardDev;
    return (Statistics.normalProbability(zUpper) - Statistics
        .normalProbability(zLower));
  }
  double weightSum = 0;
  int start = findNearestValue(data);
  for (int i = start; i < m_NumValues; i++) {
    delta = m_Values[i] - data;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper) - Statistics
        .normalProbability(zLower));
    sum += currentProb * m_Weights[i];
    /*
     * System.out.print("zL" + (i + 1) + ": " + zLower + " ");
     * System.out.print("zU" + (i + 1) + ": " + zUpper + " ");
     * System.out.print("P" + (i + 1) + ": " + currentProb + " ");
     * System.out.println("total: " + (currentProb * m_Weights[i]) + " ");
     */
    weightSum += m_Weights[i];
    if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) {
      break;
    }
  }
  for (int i = start - 1; i >= 0; i--) {
    delta = m_Values[i] - data;
    zLower = (delta - (m_Precision / 2)) / m_StandardDev;
    zUpper = (delta + (m_Precision / 2)) / m_StandardDev;
    currentProb = (Statistics.normalProbability(zUpper) - Statistics
        .normalProbability(zLower));
    sum += currentProb * m_Weights[i];
    weightSum += m_Weights[i];
    if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) {
      break;
    }
  }
  return sum / m_SumOfWeights;
}
项目:jbossBA    文件:Stats.java   
/**
  * Computes estimated extra error for given total number of instances
  * and error using normal approximation to binomial distribution
  * (and continuity correction).
  *
  * @param N number of instances
  * @param e observed error
  * @param CF confidence value
  */
 public static double addErrs(double N, double e, float CF){

   // Ignore stupid values for CF
   if (CF > 0.5) {
     System.err.println("WARNING: confidence value for pruning " +
         " too high. Error estimate not modified.");
     return 0;
   }

   // Check for extreme cases at the low end because the
   // normal approximation won't work
   if (e < 1) {

     // Base case (i.e. e == 0) from documenta Geigy Scientific
     // Tables, 6th edition, page 185
     double base = N * (1 - Math.pow(CF, 1 / N)); 
     if (e == 0) {
return base; 
     }

     // Use linear interpolation between 0 and 1 like C4.5 does
     return base + e * (addErrs(N, 1, CF) - base);
   }

   // Use linear interpolation at the high end (i.e. between N - 0.5
   // and N) because of the continuity correction
   if (e + 0.5 >= N) {

     // Make sure that we never return anything smaller than zero
     return Math.max(N - e, 0);
   }

   // Get z-score corresponding to CF
   double z = Statistics.normalInverse(1 - CF);

   // Compute upper limit of confidence interval
   double  f = (e + 0.5) / N;
   double r = (f + (z * z) / (2 * N) +
    z * Math.sqrt((f / N) - 
              (f * f / N) + 
              (z * z / (4 * N * N)))) /
     (1 + (z * z) / N);

   return (r * N) - e;
 }
项目:jbossBA    文件:DiscreteEstimatorBayes.java   
/**
 * Gets the log score contribution of this distribution
 * @param nType score type
 * @return the score
 */
public double logScore(int nType, int nCardinality) {
   double fScore = 0.0;

   switch (nType) {

   case (Scoreable.BAYES): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
fScore += Statistics.lnGamma(m_Counts[iSymbol]);
     } 

     fScore -= Statistics.lnGamma(m_SumOfCounts);
     if (m_fPrior != 0.0) {
      fScore -= m_nSymbols * Statistics.lnGamma(m_fPrior);
      fScore += Statistics.lnGamma(m_nSymbols * m_fPrior);
     }
   } 

     break;
  case (Scoreable.BDeu): {
  for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
    fScore += Statistics.lnGamma(m_Counts[iSymbol]);
  } 

  fScore -= Statistics.lnGamma(m_SumOfCounts);
  //fScore -= m_nSymbols * Statistics.lnGamma(1.0);
  //fScore += Statistics.lnGamma(m_nSymbols * 1.0);
     fScore -= m_nSymbols * Statistics.lnGamma(1.0/(m_nSymbols * nCardinality));
     fScore += Statistics.lnGamma(1.0/nCardinality);
} 
  break;

   case (Scoreable.MDL):

   case (Scoreable.AIC):

   case (Scoreable.ENTROPY): {
     for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) {
double fP = getProbability(iSymbol);

fScore += m_Counts[iSymbol] * Math.log(fP);
     } 
   } 

     break;

   default: {}
   }

   return fScore;
 }
项目:jbossBA    文件:GaussianProcesses.java   
/**
 * Predicts a confidence interval for the given instance and confidence level.
 *
 * @param inst the instance to make the prediction for
 * @param confidenceLevel the percentage of cases the interval should cover
 * @return a 1*2 array that contains the boundaries of the interval
 * @throws Exception if interval could not be estimated
 * successfully
 */
public double[][] predictInterval(Instance inst, double confidenceLevel) throws Exception {

  // Filter instance
  if (!m_checksTurnedOff) {
    m_Missing.input(inst);
    m_Missing.batchFinished();
    inst = m_Missing.output();
  }

  if (m_NominalToBinary != null) {
    m_NominalToBinary.input(inst);
    m_NominalToBinary.batchFinished();
    inst = m_NominalToBinary.output();
  }

  if (m_Filter != null) {
    m_Filter.input(inst);
    m_Filter.batchFinished();
    inst = m_Filter.output();
  }

  // Build K vector (and Kappa)

  weka.core.matrix.Matrix k = new weka.core.matrix.Matrix(m_NumTrain,1);
  for (int i = 0; i < m_NumTrain; i++) 
    k.set(i,0,m_kernel.eval(-1,i,inst));

  double kappa = m_kernel.eval(-1,-1,inst) + m_delta*m_delta;

  double estimate = k.transpose().times(m_t).get(0,0)+m_avg_target;

  double sigma = Math.sqrt(kappa - k.transpose().times(m_C).times(k).get(0,0));

  confidenceLevel = 1.0 - ((1.0 - confidenceLevel)/2.0);

  double z = Statistics.normalInverse(confidenceLevel);

  double[][] interval = new double[1][2];

  interval[0][0] = estimate - z * sigma;
  interval[0][1] = estimate + z * sigma;

  return interval;

}
项目:jbossBA    文件:PairedStats.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
项目:jbossBA    文件:PairedStatsCorrected.java   
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {

     double tval = differencesStats.mean
/ Math.sqrt((1 / count + m_testTrainRatio)
        * differencesStats.stdDev * differencesStats.stdDev);

     if (count > 1) {
differencesProbability = Statistics.FProbability(tval * tval, 1,
                         (int) count - 1);
     } else differencesProbability = 1;
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }