/** * Returns the quantile for the given percentage. * * @param percentage the percentage * @return the quantile */ @Override public double predictQuantile(double percentage) { // Update the bandwidth updateBoundariesAndOrWeights(); // Compute minimum and maximum value, and delta double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2); double min = m_TM.firstKey() - val * m_Width; double max = m_TM.lastKey() + val * m_Width; double delta = (max - min) / m_NumIntervals; double sum = 0; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { if (sum >= percentage) { return min + i * delta; } double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); sum += 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } return max; }
/** * Returns the quantile for the given percentage. * * @param percentage the percentage * @return the quantile */ @Override public double predictQuantile(double percentage) { // Update the bandwidth updateWidth(); // Compute minimum and maximum value, and delta double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2); double min = m_TM.firstKey() - val * m_Width; double max = m_TM.lastKey() + val * m_Width; double delta = (max - min) / m_NumIntervals; double sum = 0; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { if (sum >= percentage) { return min + i * delta; } double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); sum += 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } return max; }
/** * Get a probability estimator for a value * * @param given the new value that data is conditional upon * @return the estimator for the supplied value given the condition */ public Estimator getEstimator(double given) { Estimator result = new KernelEstimator(m_Precision); if (m_NumValues == 0) { return result; } double delta = 0, currentProb = 0; double zLower, zUpper; for(int i = 0; i < m_NumValues; i++) { delta = m_CondValues[i] - given; zLower = (delta - (m_Precision / 2)) / m_StandardDev; zUpper = (delta + (m_Precision / 2)) / m_StandardDev; currentProb = (Statistics.normalProbability(zUpper) - Statistics.normalProbability(zLower)); result.addValue(m_Values[i], currentProb * m_Weights[i]); } return result; }
/** * Returns the quantile for the given percentage. * * @param percentage the percentage * @return the quantile */ public double predictQuantile(double percentage) { // Update the bandwidth updateBoundariesAndOrWeights(); // Compute minimum and maximum value, and delta double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2); double min = m_TM.firstKey() - val * m_Width; double max = m_TM.lastKey() + val * m_Width; double delta = (max - min) / m_NumIntervals; // Compute approximate quantile double[] probabilities = new double[m_NumIntervals]; double sum = 0; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { if (sum >= percentage) { return min + i * delta; } double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); sum += 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } return max; }
/** * Returns the quantile for the given percentage. * * @param percentage the percentage * @return the quantile */ public double predictQuantile(double percentage) { // Update the bandwidth updateWidth(); // Compute minimum and maximum value, and delta double val = Statistics.normalInverse(1.0 - (1.0 - 0.95) / 2); double min = m_TM.firstKey() - val * m_Width; double max = m_TM.lastKey() + val * m_Width; double delta = (max - min) / m_NumIntervals; // Compute approximate quantile double[] probabilities = new double[m_NumIntervals]; double sum = 0; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { if (sum >= percentage) { return min + i * delta; } double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); sum += 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } return max; }
/** * Get a probability estimate for a value * * @param data the value to estimate the probability of * @return the estimated probability of the supplied value */ @Override public double getProbability(double data) { data = round(data); double zLower = (data - m_Mean - (m_Precision / 2)) / m_StandardDev; double zUpper = (data - m_Mean + (m_Precision / 2)) / m_StandardDev; double pLower = Statistics.normalProbability(zLower); double pUpper = Statistics.normalProbability(zUpper); return pUpper - pLower; }
/** * Returns the quantile for the given percentage. * * @param percentage the percentage * @return the quantile */ public double predictQuantile(double percentage) { // Compute minimum and maximum value, and delta double valRight = Statistics.normalInverse(percentage); double valLeft = Statistics.normalInverse(0.001); double min = Double.MAX_VALUE; double max = -Double.MAX_VALUE; for (int i = 0; i < m_Means.length; i++) { double l = m_Means[i] - valLeft * m_StdDevs[i]; if (l < min) { min = l; } double r = m_Means[i] + valRight * m_StdDevs[i]; if (r > max) { max = r; } } double delta = (max - min) / m_NumIntervals; double sum = 0; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { if (sum >= percentage) { return min + i * delta; } double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); sum += 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } return max; }
/** * Get a probability estimate for a value * * @param data the value to estimate the probability of * @return the estimated probability of the supplied value */ public double getProbability(double data) { data = round(data); double zLower = (data - m_Mean - (m_Precision / 2)) / m_StandardDev; double zUpper = (data - m_Mean + (m_Precision / 2)) / m_StandardDev; double pLower = Statistics.normalProbability(zLower); double pUpper = Statistics.normalProbability(zUpper); return pUpper - pLower; }
/** * calculates the probability using a binomial distribution. * If the support of the premise is too large this distribution * is approximated by a normal distribution. * @param accuracy the accuracy value * @param ruleCount the support of the whole rule * @param premiseCount the support of the premise * @return the probability value */ public static final double binomialDistribution(double accuracy, double ruleCount, double premiseCount){ double mu, sigma; if(premiseCount < MAX_N) return Math.pow(2,(Utils.log2(Math.pow(accuracy,ruleCount))+Utils.log2(Math.pow((1.0-accuracy),(premiseCount-ruleCount)))+PriorEstimation.logbinomialCoefficient((int)premiseCount,(int)ruleCount))); else{ mu = premiseCount * accuracy; sigma = Math.sqrt((premiseCount * (1.0 - accuracy))*accuracy); return Statistics.normalProbability(((ruleCount+0.5)-mu)/(sigma*Math.sqrt(2))); } }
/** * t-test for unequal sample sizes and same variance. Returns probability * that observed difference in means is due to chance. */ private double ttest(Stats c1, Stats c2) throws Exception { double n1 = c1.count; double n2 = c2.count; double v1 = c1.stdDev * c1.stdDev; double v2 = c2.stdDev * c2.stdDev; double av1 = c1.mean; double av2 = c2.mean; double df = n1 + n2 - 2; double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df; double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2))); return Statistics.incompleteBeta(df / 2.0, 0.5, df / (df + (t * t))); }
/** * Computes estimated extra error for given total number of instances * and error using normal approximation to binomial distribution * (and continuity correction). * * @param N number of instances * @param e observed error * @param CF confidence value */ public static double addErrs(double N, double e, float CF){ // Ignore stupid values for CF if (CF > 0.5) { System.err.println("WARNING: confidence value for pruning " + " too high. Error estimate not modified."); return 0; } // Check for extreme cases at the low end because the // normal approximation won't work if (e < 1) { // Base case (i.e. e == 0) from documenta Geigy Scientific // Tables, 6th edition, page 185 double base = N * (1 - Math.pow(CF, 1 / N)); if (e == 0) { return base; } // Use linear interpolation between 0 and 1 like C4.5 does return base + e * (addErrs(N, 1, CF) - base); } // Use linear interpolation at the high end (i.e. between N - 0.5 // and N) because of the continuity correction if (e + 0.5 >= N) { // Make sure that we never return anything smaller than zero return Math.max(N - e, 0); } // Get z-score corresponding to CF double z = Statistics.normalInverse(1 - CF); // Compute upper limit of confidence interval double f = (e + 0.5) / N; double r = (f + (z * z) / (2 * N) + z * Math.sqrt((f / N) - (f * f / N) + (z * z / (4 * N * N)))) / (1 + (z * z) / N); return (r * N) - e; }
/** * Gets the log score contribution of this distribution * @param nType score type * @return the score */ public double logScore(int nType, int nCardinality) { double fScore = 0.0; switch (nType) { case (Scoreable.BAYES): { for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) { fScore += Statistics.lnGamma(m_Counts[iSymbol]); } fScore -= Statistics.lnGamma(m_SumOfCounts); if (m_fPrior != 0.0) { fScore -= m_nSymbols * Statistics.lnGamma(m_fPrior); fScore += Statistics.lnGamma(m_nSymbols * m_fPrior); } } break; case (Scoreable.BDeu): { for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) { fScore += Statistics.lnGamma(m_Counts[iSymbol]); } fScore -= Statistics.lnGamma(m_SumOfCounts); //fScore -= m_nSymbols * Statistics.lnGamma(1.0); //fScore += Statistics.lnGamma(m_nSymbols * 1.0); fScore -= m_nSymbols * Statistics.lnGamma(1.0/(m_nSymbols * nCardinality)); fScore += Statistics.lnGamma(1.0/nCardinality); } break; case (Scoreable.MDL): case (Scoreable.AIC): case (Scoreable.ENTROPY): { for (int iSymbol = 0; iSymbol < m_nSymbols; iSymbol++) { double fP = getProbability(iSymbol); fScore += m_Counts[iSymbol] * Math.log(fP); } } break; default: {} } return fScore; }
/** * Calculates the derived statistics (significance etc). */ public void calculateDerived() { xStats.calculateDerived(); yStats.calculateDerived(); differencesStats.calculateDerived(); correlation = Double.NaN; if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev) && !Utils.eq(xStats.stdDev, 0)) { double slope = (xySum - xStats.sum * yStats.sum / count) / (xStats.sumSq - xStats.sum * xStats.mean); if (!Utils.eq(yStats.stdDev, 0)) { correlation = slope * xStats.stdDev / yStats.stdDev; } else { correlation = 1.0; } } if (Utils.gr(differencesStats.stdDev, 0)) { double tval = differencesStats.mean * Math.sqrt(count) / differencesStats.stdDev; if (m_degreesOfFreedom >= 1){ differencesProbability = Statistics.FProbability(tval * tval, 1, m_degreesOfFreedom); } else { if (count > 1) { differencesProbability = Statistics.FProbability(tval * tval, 1, (int) count - 1); } else { differencesProbability = 1; } } } else { if (differencesStats.sumSq == 0) { differencesProbability = 1.0; } else { differencesProbability = 0.0; } } differencesSignificance = 0; if (differencesProbability <= sigLevel) { if (xStats.mean > yStats.mean) { differencesSignificance = 1; } else { differencesSignificance = -1; } } }
/** * Calculates the derived statistics (significance etc). */ public void calculateDerived() { xStats.calculateDerived(); yStats.calculateDerived(); differencesStats.calculateDerived(); correlation = Double.NaN; if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev) && !Utils.eq(xStats.stdDev, 0)) { double slope = (xySum - xStats.sum * yStats.sum / count) / (xStats.sumSq - xStats.sum * xStats.mean); if (!Utils.eq(yStats.stdDev, 0)) { correlation = slope * xStats.stdDev / yStats.stdDev; } else { correlation = 1.0; } } if (Utils.gr(differencesStats.stdDev, 0)) { double tval = differencesStats.mean / Math.sqrt((1 / count + m_testTrainRatio) * differencesStats.stdDev * differencesStats.stdDev); if (count > 1) { differencesProbability = Statistics.FProbability(tval * tval, 1, (int) count - 1); } else differencesProbability = 1; } else { if (differencesStats.sumSq == 0) { differencesProbability = 1.0; } else { differencesProbability = 0.0; } } differencesSignificance = 0; if (differencesProbability <= sigLevel) { if (xStats.mean > yStats.mean) { differencesSignificance = 1; } else { differencesSignificance = -1; } } }
/** * Returns the cumulative probability of the standard normal. * @param x the quantile */ public static double pnorm( double x ) { return Statistics.normalProbability( x ); }
/** * Get a probability estimate for a value. * * @param data the value to estimate the probability of * @return the estimated probability of the supplied value */ @Override public double getProbability(double data) { double delta = 0, sum = 0, currentProb = 0; double zLower = 0, zUpper = 0; if (m_NumValues == 0) { zLower = (data - (m_Precision / 2)) / m_StandardDev; zUpper = (data + (m_Precision / 2)) / m_StandardDev; return (Statistics.normalProbability(zUpper) - Statistics .normalProbability(zLower)); } double weightSum = 0; int start = findNearestValue(data); for (int i = start; i < m_NumValues; i++) { delta = m_Values[i] - data; zLower = (delta - (m_Precision / 2)) / m_StandardDev; zUpper = (delta + (m_Precision / 2)) / m_StandardDev; currentProb = (Statistics.normalProbability(zUpper) - Statistics .normalProbability(zLower)); sum += currentProb * m_Weights[i]; /* * System.out.print("zL" + (i + 1) + ": " + zLower + " "); * System.out.print("zU" + (i + 1) + ": " + zUpper + " "); * System.out.print("P" + (i + 1) + ": " + currentProb + " "); * System.out.println("total: " + (currentProb * m_Weights[i]) + " "); */ weightSum += m_Weights[i]; if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) { break; } } for (int i = start - 1; i >= 0; i--) { delta = m_Values[i] - data; zLower = (delta - (m_Precision / 2)) / m_StandardDev; zUpper = (delta + (m_Precision / 2)) / m_StandardDev; currentProb = (Statistics.normalProbability(zUpper) - Statistics .normalProbability(zLower)); sum += currentProb * m_Weights[i]; weightSum += m_Weights[i]; if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) { break; } } return sum / m_SumOfWeights; }
/** * Get a probability estimate for a value. * * @param data the value to estimate the probability of * @return the estimated probability of the supplied value */ public double getProbability(double data) { double delta = 0, sum = 0, currentProb = 0; double zLower = 0, zUpper = 0; if (m_NumValues == 0) { zLower = (data - (m_Precision / 2)) / m_StandardDev; zUpper = (data + (m_Precision / 2)) / m_StandardDev; return (Statistics.normalProbability(zUpper) - Statistics.normalProbability(zLower)); } double weightSum = 0; int start = findNearestValue(data); for (int i = start; i < m_NumValues; i++) { delta = m_Values[i] - data; zLower = (delta - (m_Precision / 2)) / m_StandardDev; zUpper = (delta + (m_Precision / 2)) / m_StandardDev; currentProb = (Statistics.normalProbability(zUpper) - Statistics.normalProbability(zLower)); sum += currentProb * m_Weights[i]; /* System.out.print("zL" + (i + 1) + ": " + zLower + " "); System.out.print("zU" + (i + 1) + ": " + zUpper + " "); System.out.print("P" + (i + 1) + ": " + currentProb + " "); System.out.println("total: " + (currentProb * m_Weights[i]) + " "); */ weightSum += m_Weights[i]; if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) { break; } } for (int i = start - 1; i >= 0; i--) { delta = m_Values[i] - data; zLower = (delta - (m_Precision / 2)) / m_StandardDev; zUpper = (delta + (m_Precision / 2)) / m_StandardDev; currentProb = (Statistics.normalProbability(zUpper) - Statistics.normalProbability(zLower)); sum += currentProb * m_Weights[i]; weightSum += m_Weights[i]; if (currentProb * (m_SumOfWeights - weightSum) < sum * MAX_ERROR) { break; } } return sum / m_SumOfWeights; }
/** * Predicts a confidence interval for the given instance and confidence level. * * @param inst the instance to make the prediction for * @param confidenceLevel the percentage of cases the interval should cover * @return a 1*2 array that contains the boundaries of the interval * @throws Exception if interval could not be estimated * successfully */ public double[][] predictInterval(Instance inst, double confidenceLevel) throws Exception { // Filter instance if (!m_checksTurnedOff) { m_Missing.input(inst); m_Missing.batchFinished(); inst = m_Missing.output(); } if (m_NominalToBinary != null) { m_NominalToBinary.input(inst); m_NominalToBinary.batchFinished(); inst = m_NominalToBinary.output(); } if (m_Filter != null) { m_Filter.input(inst); m_Filter.batchFinished(); inst = m_Filter.output(); } // Build K vector (and Kappa) weka.core.matrix.Matrix k = new weka.core.matrix.Matrix(m_NumTrain,1); for (int i = 0; i < m_NumTrain; i++) k.set(i,0,m_kernel.eval(-1,i,inst)); double kappa = m_kernel.eval(-1,-1,inst) + m_delta*m_delta; double estimate = k.transpose().times(m_t).get(0,0)+m_avg_target; double sigma = Math.sqrt(kappa - k.transpose().times(m_C).times(k).get(0,0)); confidenceLevel = 1.0 - ((1.0 - confidenceLevel)/2.0); double z = Statistics.normalInverse(confidenceLevel); double[][] interval = new double[1][2]; interval[0][0] = estimate - z * sigma; interval[0][1] = estimate + z * sigma; return interval; }