private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores, RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException { Document d = searcherUnderTest.doc(scoreDoc.doc); String idVal = d.get("id"); int docId = Integer.decode(idVal); float modelScore = scores[docId]; float queryScore = scoreDoc.score; assertEquals("Scores match with similarity " + similarity.getClass(), modelScore, queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore)); if (!(similarity instanceof TFIDFSimilarity)) { // There are precision issues with these similarities when using explain // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain Explanation expl = searcherUnderTest.explain(ltrQuery, docId); assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue(), queryScore, 5 * Math.ulp(modelScore)); checkFeatureNames(expl, features); } }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field); if (similarity == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)"); } final NumericDocValues norms = readerContext.reader().getNormValues(field); if (norms == null) { return new ConstDoubleDocValues(0.0, this); } return new FloatDocValues(this) { @Override public float floatVal(int doc) { return similarity.decodeNormValue(norms.get(doc)); } }; }
public void testHyperbolicSweetSpot() { SweetSpotSimilarity ss = new SweetSpotSimilarity() { @Override public float tf(float freq) { return hyperbolicTf(freq); } }; ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f); TFIDFSimilarity s = ss; for (int i = 1; i <=1000; i++) { assertTrue("MIN tf: i="+i+" : s="+s.tf(i), 3.3f <= s.tf(i)); assertTrue("MAX tf: i="+i+" : s="+s.tf(i), s.tf(i) <= 7.7f); } assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f); // stupidity assertEquals("tf zero", 0.0f, s.tf(0), 0.0f); }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field); if (similarity == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)"); } final NumericDocValues norms = readerContext.reader().getNormValues(field); if (norms == null) { return new ConstDoubleDocValues(0.0, this); } return new FloatDocValues(this) { @Override public float floatVal(int doc) { return similarity.decodeNormValue((byte)norms.get(doc)); } }; }
public void testHyperbolicSweetSpot() { SweetSpotSimilarity ss = new SweetSpotSimilarity() { @Override public float tf(int freq) { return hyperbolicTf(freq); } }; ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f); TFIDFSimilarity s = ss; for (int i = 1; i <=1000; i++) { assertTrue("MIN tf: i="+i+" : s="+s.tf(i), 3.3f <= s.tf(i)); assertTrue("MAX tf: i="+i+" : s="+s.tf(i), s.tf(i) <= 7.7f); } assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f); // stupidity assertEquals("tf zero", 0.0f, s.tf(0), 0.0f); }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field); if (sim == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)"); } int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes)); float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc()); return new ConstDoubleDocValues(idf, this); }
static TFIDFSimilarity asTFIDF(Similarity sim, String field) { while (sim instanceof PerFieldSimilarityWrapper) { sim = ((PerFieldSimilarityWrapper)sim).get(field); } if (sim instanceof TFIDFSimilarity) { return (TFIDFSimilarity)sim; } else { return null; } }
public void testSimple2() throws Exception { assumeTrue("Broken scoring: LUCENE-3723", searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery q1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery q2 = new SpanTermQuery(new Term("last", "smith")); SpanQuery q = new SpanNearQuery(new SpanQuery[] { q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false ); check(q, new int[] { 2, 4 }); q = new SpanNearQuery(new SpanQuery[] { new FieldMaskingSpanQuery(q1, "id"), new FieldMaskingSpanQuery(q2, "id") }, -1, false ); check(q, new int[] { 2, 4 }); }
public void testSpans2() throws Exception { assumeTrue("Broken scoring: LUCENE-3723", searcher.getSimilarity() instanceof TFIDFSimilarity); SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female")); SpanQuery qA2 = new SpanTermQuery(new Term("first", "james")); SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender")); SpanQuery qB = new SpanTermQuery(new Term("last", "jones")); SpanQuery q = new SpanNearQuery(new SpanQuery[] { new FieldMaskingSpanQuery(qA, "id"), new FieldMaskingSpanQuery(qB, "id") }, -1, false ); check(q, new int[] { 0, 1, 2, 3 }); Spans span = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), q); assertEquals(true, span.next()); assertEquals(s(0,0,1), s(span)); assertEquals(true, span.next()); assertEquals(s(1,1,2), s(span)); assertEquals(true, span.next()); assertEquals(s(2,0,1), s(span)); assertEquals(true, span.next()); assertEquals(s(2,2,3), s(span)); assertEquals(true, span.next()); assertEquals(s(3,0,1), s(span)); assertEquals(false, span.next()); }
public AbstractAuthorityQueryWeight(SolrIndexSearcher searcher, Query query, String authTermName, String authTermText) throws IOException { this.searcher = searcher; this.query = query; this.similarity = (TFIDFSimilarity) searcher.getSimilarity(); CollectionStatistics collectionStats = searcher.collectionStatistics(authTermName); final IndexReaderContext context = searcher.getTopReaderContext(); final Term term = new Term(authTermName, authTermText); final TermContext termContext = TermContext.build(context, term); TermStatistics termStats = searcher.termStatistics(term, termContext); idfExp = similarity.idfExplain(collectionStats, termStats); idf = idfExp.getValue(); }
public void setSimilarity(Similarity similarity) { if (similarity == null || similarity instanceof TFIDFSimilarity) { //LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it this.similarity = (TFIDFSimilarity) similarity; } }
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) { this.ir = ir; this.similarity = sim; }
public TFIDFSimilarity getSimilarity() { return similarity; }
public void setSimilarity(TFIDFSimilarity similarity) { this.similarity = similarity; }
public RelevancyFeedback(IndexReader ir, TFIDFSimilarity sim) { this.ir = ir; this.similarity = sim; }