/** * Test using Kononenko's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the split is acceptable */ private boolean KononenkosMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double distPrior, instPrior, distAfter = 0, sum, instAfter = 0; double before, after; int numClassesTotal; // Number of classes occuring in the set numClassesTotal = 0; for (double priorCount : priorCounts) { if (priorCount > 0) { numClassesTotal++; } } // Encode distribution prior to split distPrior = SpecialFunctions.log2Binomial(numInstances + numClassesTotal - 1, numClassesTotal - 1); // Encode instances prior to split. instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts); before = instPrior + distPrior; // Encode distributions and instances after split. for (double[] bestCount : bestCounts) { sum = Utils.sum(bestCount); distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1, numClassesTotal - 1); instAfter += SpecialFunctions.log2Multinomial(sum, bestCount); } // Coding cost after split after = Utils.log2(numCutPoints) + distAfter + instAfter; // Check if split is to be accepted return (before > after); }
/** * Compute factor for Bonferroni correction. This is based on Equation 3.2 in * Kass (1980). */ protected double BFfactor(int c, int r) { double sum = 0; double multiplier = 1.0; for (int i = 0; i < r; i++) { sum += multiplier * Math .exp((c * Math.log(r - i) - (SpecialFunctions.lnFactorial(i) + SpecialFunctions .lnFactorial(r - i)))); multiplier *= -1.0; } return sum; }
/** * Method that calculates the base 2 logarithm of a binomial coefficient * @param upperIndex upper Inedx of the binomial coefficient * @param lowerIndex lower index of the binomial coefficient * @return the base 2 logarithm of the binomial coefficient */ public static final double logbinomialCoefficient(int upperIndex, int lowerIndex){ double result =1.0; if(upperIndex == lowerIndex || lowerIndex == 0) return result; result = SpecialFunctions.log2Binomial((double)upperIndex, (double)lowerIndex); return result; }
/** * Test using Kononenko's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the split is acceptable */ private boolean KononenkosMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double distPrior, instPrior, distAfter = 0, sum, instAfter = 0; double before, after; int numClassesTotal; // Number of classes occuring in the set numClassesTotal = 0; for (int i = 0; i < priorCounts.length; i++) { if (priorCounts[i] > 0) { numClassesTotal++; } } // Encode distribution prior to split distPrior = SpecialFunctions.log2Binomial(numInstances + numClassesTotal - 1, numClassesTotal - 1); // Encode instances prior to split. instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts); before = instPrior + distPrior; // Encode distributions and instances after split. for (int i = 0; i < bestCounts.length; i++) { sum = Utils.sum(bestCounts[i]); distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1, numClassesTotal - 1); instAfter += SpecialFunctions.log2Multinomial(sum, bestCounts[i]); } // Coding cost after split after = Utils.log2(numCutPoints) + distAfter + instAfter; // Check if split is to be accepted return (before > after); }