public void getTopK(int testIndex, int numPred, ArrayList<String> predictions, ArrayList<Double> confidenceScores) { int size = docs[0].vector.getDimension(); SparseRealVector onesVector = new OpenMapRealVector(size); for(int i=0; i<size; i++) { onesVector.setEntry(i, 1); } List<Prediction> sortedPredictions = new ArrayList<Prediction>(); // descending order of jaccard similarity for(int i=0; i<info.noTrainingDocs; i++) { double jacSim = getJaccardSimilarity(docs[i], docs[info.noTrainingDocs + testIndex], onesVector); Prediction pred = new Prediction(new Integer(i).toString(), jacSim); if(jacSim > 0.0) { sortedPredictions.add(pred); } //System.out.println(i+" "+jacSim); } Collections.sort(sortedPredictions, new PredictionComparator()); //System.out.println("no of predictions = "+sortedPredictions.size()); for(int j=0; j<numPred && j<sortedPredictions.size(); j++) { predictions.add(sortedPredictions.get(j).predictionIndex); confidenceScores.add(sortedPredictions.get(j).confidenceScore); } }
public static double getJaccardSimilarity(DocVector d1, DocVector d2, SparseRealVector onesVector) { return (d1.vector.dotProduct(d2.vector)) / (d1.vector.dotProduct(onesVector) + d2.vector.dotProduct(onesVector) - d1.vector.dotProduct(d2.vector)); }