Java 类org.apache.lucene.search.CollectionStatistics 实例源码

项目:elasticsearch_my    文件:DfsSearchResult.java   
public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
    final int numFieldStatistics = in.readVInt();
    if (fieldStatistics == null) {
        fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
    }
    for (int i = 0; i < numFieldStatistics; i++) {
        final String field = in.readString();
        assert field != null;
        final long maxDoc = in.readVLong();
        final long docCount = subOne(in.readVLong());
        final long sumTotalTermFreq = subOne(in.readVLong());
        final long sumDocFreq = subOne(in.readVLong());
        CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
        fieldStatistics.put(field, stats);
    }
    return fieldStatistics;
}
项目:Elasticsearch    文件:DfsSearchResult.java   
public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
    final int numFieldStatistics = in.readVInt();
    if (fieldStatistics == null) {
        fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
    }
    for (int i = 0; i < numFieldStatistics; i++) {
        final String field = in.readString();
        assert field != null;
        final long maxDoc = in.readVLong();
        final long docCount = subOne(in.readVLong());
        final long sumTotalTermFreq = subOne(in.readVLong());
        final long sumDocFreq = subOne(in.readVLong());
        CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
        fieldStatistics.put(field, stats);
    }
    return fieldStatistics;
}
项目:lucene4ir    文件:SMARTBNNBNNSimilarity.java   
@Override
   public final SimWeight computeWeight(CollectionStatistics collectionStats,
                 TermStatistics... termStats)
   {
float N, n, idf, adl;
idf = 1.0f;
N   = collectionStats.maxDoc();
adl = collectionStats.sumTotalTermFreq() / N;

if (termStats.length == 1) {
    n = termStats[0].docFreq();
    idf = log(N/n);
}
else {
    for (final TermStatistics stat : termStats) {
    n = stat.docFreq();
    idf += log(N/n);
    }
}

return new TFIDFWeight(collectionStats.field(), idf, adl);
   }
项目:DoSeR-Disambiguation    文件:FuzzyLabelSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for each term in the
 * phrase.
 * 
 * @param collectionStats
 *            collection-level statistics
 * @param termStats
 *            term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf score factor for the
 *         phrase and an explanation for each term.
 */
public Explanation idfExplain(final CollectionStatistics collectionStats,
        final TermStatistics termStats[]) {
    final long max = collectionStats.maxDoc();
    float idf = 0.0f;
    final Explanation exp = new Explanation();
    exp.setDescription("idf(), sum of:");
    for (final TermStatistics stat : termStats) {
        final long docFreq = stat.docFreq();
        final float termIdf = idf(docFreq, max);
        exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + docFreq
                + ", maxDocs=" + max + ")"));
        idf += termIdf;
    }
    exp.setValue(idf);
    return exp;
}
项目:DoSeR    文件:FuzzyLabelSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for each term in the
 * phrase.
 * 
 * @param collectionStats
 *            collection-level statistics
 * @param termStats
 *            term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf score factor for the
 *         phrase and an explanation for each term.
 */
public Explanation idfExplain(final CollectionStatistics collectionStats,
        final TermStatistics termStats[]) {
    final long max = collectionStats.maxDoc();
    float idf = 0.0f;
    final Explanation exp = new Explanation();
    exp.setDescription("idf(), sum of:");
    for (final TermStatistics stat : termStats) {
        final long docFreq = stat.docFreq();
        final float termIdf = idf(docFreq, max);
        exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + docFreq
                + ", maxDocs=" + max + ")"));
        idf += termIdf;
    }
    exp.setValue(idf);
    return exp;
}
项目:elasticsearch_my    文件:DfsSearchResult.java   
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
    out.writeVInt(fieldStatistics.size());

    for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) {
        out.writeString(c.key);
        CollectionStatistics statistics = c.value;
        assert statistics.maxDoc() >= 0;
        out.writeVLong(statistics.maxDoc());
        out.writeVLong(addOne(statistics.docCount()));
        out.writeVLong(addOne(statistics.sumTotalTermFreq()));
        out.writeVLong(addOne(statistics.sumDocFreq()));
    }
}
项目:elasticsearch_my    文件:TermVectorsWriter.java   
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException {
    long sttf = fieldStats.sumTotalTermFreq();
    assert (sttf >= -1);
    writePotentiallyNegativeVLong(sttf);
    long sdf = fieldStats.sumDocFreq();
    assert (sdf >= -1);
    writePotentiallyNegativeVLong(sdf);
    int dc = (int) fieldStats.docCount();
    assert (dc >= -1);
    writePotentiallyNegativeVInt(dc);
}
项目:lams    文件:SimilarityBase.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  BasicStats stats[] = new BasicStats[termStats.length];
  for (int i = 0; i < termStats.length; i++) {
    stats[i] = newStats(collectionStats.field(), queryBoost);
    fillBasicStats(stats[i], collectionStats, termStats[i]);
  }
  return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
}
项目:lams    文件:SimilarityBase.java   
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
 *  Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  // #positions(field) must be >= #positions(term)
  assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
  long numberOfDocuments = collectionStats.maxDoc();

  long docFreq = termStats.docFreq();
  long totalTermFreq = termStats.totalTermFreq();

  // codec does not supply totalTermFreq: substitute docFreq
  if (totalTermFreq == -1) {
    totalTermFreq = docFreq;
  }

  final long numberOfFieldTokens;
  final float avgFieldLength;

  long sumTotalTermFreq = collectionStats.sumTotalTermFreq();

  if (sumTotalTermFreq <= 0) {
    // field does not exist;
    // We have to provide something if codec doesnt supply these measures,
    // or if someone omitted frequencies for the field... negative values cause
    // NaN/Inf for some scorers.
    numberOfFieldTokens = docFreq;
    avgFieldLength = 1;
  } else {
    numberOfFieldTokens = sumTotalTermFreq;
    avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
  }

  // TODO: add sumDocFreq for field (numberOfFieldPostings)
  stats.setNumberOfDocuments(numberOfDocuments);
  stats.setNumberOfFieldTokens(numberOfFieldTokens);
  stats.setAvgFieldLength(avgFieldLength);
  stats.setDocFreq(docFreq);
  stats.setTotalTermFreq(totalTermFreq);
}
项目:lams    文件:BM25Similarity.java   
/** The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
 * or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
 * or any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
  final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
  if (sumTotalTermFreq <= 0) {
    return 1f;       // field does not exist, or stat is unsupported
  } else {
    return (float) (sumTotalTermFreq / (double) collectionStats.maxDoc());
  }
}
项目:lams    文件:BM25Similarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:lams    文件:BM25Similarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
}
项目:lams    文件:PerFieldSimilarityWrapper.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  PerFieldSimWeight weight = new PerFieldSimWeight();
  weight.delegate = get(collectionStats.field());
  weight.delegateWeight = weight.delegate.computeWeight(queryBoost, collectionStats, termStats);
  return weight;
}
项目:lams    文件:MultiSimilarity.java   
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  SimWeight subStats[] = new SimWeight[sims.length];
  for (int i = 0; i < subStats.length; i++) {
    subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
  }
  return new MultiStats(subStats);
}
项目:lams    文件:LMSimilarity.java   
/**
 * Computes the collection probability of the current term in addition to the
 * usual statistics.
 */
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  super.fillBasicStats(stats, collectionStats, termStats);
  LMStats lmStats = (LMStats) stats;
  lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
项目:lams    文件:TFIDFSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:lams    文件:TFIDFSimilarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  final Explanation idf = termStats.length == 1
  ? idfExplain(collectionStats, termStats[0])
  : idfExplain(collectionStats, termStats);
  return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:Elasticsearch    文件:DfsSearchResult.java   
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
    out.writeVInt(fieldStatistics.size());

    for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) {
        out.writeString(c.key);
        CollectionStatistics statistics = c.value;
        assert statistics.maxDoc() >= 0;
        out.writeVLong(statistics.maxDoc());
        out.writeVLong(addOne(statistics.docCount()));
        out.writeVLong(addOne(statistics.sumTotalTermFreq()));
        out.writeVLong(addOne(statistics.sumDocFreq()));
    }
}
项目:Elasticsearch    文件:TermVectorsWriter.java   
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException {
    long sttf = fieldStats.sumTotalTermFreq();
    assert (sttf >= -1);
    writePotentiallyNegativeVLong(sttf);
    long sdf = fieldStats.sumDocFreq();
    assert (sdf >= -1);
    writePotentiallyNegativeVLong(sdf);
    int dc = (int) fieldStats.docCount();
    assert (dc >= -1);
    writePotentiallyNegativeVInt(dc);
}
项目:ir-generalized-translation-models    文件:BM25SimilarityLossless.java   
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code>,
 * or returns <code>1</code> if the index does not store sumTotalTermFreq:
 * any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
  final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
  if (sumTotalTermFreq <= 0) {
    return 1f;       // field does not exist, or stat is unsupported
  } else {
    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
    return (float) (sumTotalTermFreq / (double) docCount);
  }
}
项目:ir-generalized-translation-models    文件:BM25SimilarityLossless.java   
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  return new BM25StatsFixed(collectionStats.field(), k1, b, idf, avgdl);
}
项目:linden    文件:LindenSimilarity.java   
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
  final long df = termStats.docFreq();
  final long max = collectionStats.maxDoc();
  final float idf = idfManager.getIDF(termStats.term().utf8ToString());
  return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}
项目:lucene4ir    文件:BM25Similarity.java   
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code>,
 * or returns <code>1</code> if the index does not store sumTotalTermFreq:
 * any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
  final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
  if (sumTotalTermFreq <= 0) {
    return 1f;       // field does not exist, or stat is unsupported
  } else {
    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
    return (float) (sumTotalTermFreq / (double) docCount);
  }
}
项目:lucene4ir    文件:BM25Similarity.java   
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}
项目:lucene4ir    文件:OKAPIBM25Similarity.java   
@Override
   public final SimWeight computeWeight(CollectionStatistics collectionStats,
                 TermStatistics... termStats)
   {
long  N, n;
float idf_, avdl;

idf_ = 1.0f;

N    = collectionStats.docCount();
if (N == -1)
    N = collectionStats.maxDoc();

avdl = collectionStats.sumTotalTermFreq() / N;

if (termStats.length == 1) {
    n    = termStats[0].docFreq();
    idf_ = idf(n, N);
}
else { /* computation for a phrase */
    for (final TermStatistics stat : termStats) {
    n     = stat.docFreq();
    idf_ += idf(n, N);
    }
}

return new TFIDFWeight(collectionStats.field(), idf_, avdl);
   }
项目:lucene4ir    文件:ExampleStatsApp.java   
public void reportCollectionStatistics()throws IOException {

        IndexSearcher searcher = new IndexSearcher(reader);

        CollectionStatistics collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_ALL);
        long token_count = collectionStats.sumTotalTermFreq();
        long doc_count = collectionStats.docCount();
        long sum_doc_count = collectionStats.sumDocFreq();
        long avg_doc_length = token_count / doc_count;

        System.out.println("ALL: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length);

        collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_TITLE);
        token_count = collectionStats.sumTotalTermFreq();
        doc_count = collectionStats.docCount();
        sum_doc_count = collectionStats.sumDocFreq();
        avg_doc_length = token_count / doc_count;

        System.out.println("TITLE: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length);


        collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_CONTENT);
        token_count = collectionStats.sumTotalTermFreq();
        doc_count = collectionStats.docCount();
        sum_doc_count = collectionStats.sumDocFreq();
        avg_doc_length = token_count / doc_count;

        System.out.println("CONTENT: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length);

    }
项目:DoSeR-Disambiguation    文件:FuzzyLabelSimilarity.java   
@Override
public final SimWeight computeWeight(final float queryBoost,
        final CollectionStatistics collectionStats,
        final TermStatistics... termStats) {
    final Explanation idf = termStats.length == 1 ? this.idfExplain(
            collectionStats, termStats[0]) : this.idfExplain(
            collectionStats, termStats);
    return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:elasticsearch-simple-similarity    文件:SimpleSimilarity.java   
public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    if (termStats.length == 1) {
        return new SimpleScore(boost, collectionStats, termStats[0]);
    } else {
        return new SimpleScore(boost, collectionStats, termStats);
    }
}
项目:elasticsearch-simple-similarity    文件:SimpleSimilarity.java   
SimpleScore(float boost, CollectionStatistics collectionStats, TermStatistics termStats[]) {
    float total = 0.0f;
    List<Explanation> scores = new ArrayList<>();
    for (final TermStatistics stat : termStats) {
        String description = String.format("simple score for (%s:%s)", collectionStats.field(), stat.term().utf8ToString());
        scores.add(Explanation.match(1.0f, description));
        total += 1.0f;
    }
    this.score = Explanation.match(total, "total score, sum of:", scores);
    this.boost = Explanation.match(boost, "boost");
}
项目:LuceneDB    文件:DummySimilarity.java   
@Override
public SimWeight computeWeight(float queryBoost,
        CollectionStatistics collectionStats, TermStatistics... termStats) {
    return new SimWeight() {

        @Override
        public void normalize(float queryNorm, float topLevelBoost) {
        }

        @Override
        public float getValueForNormalization() {
            return 0;
        }
    };
}
项目:search    文件:SimilarityBase.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  BasicStats stats[] = new BasicStats[termStats.length];
  for (int i = 0; i < termStats.length; i++) {
    stats[i] = newStats(collectionStats.field(), queryBoost);
    fillBasicStats(stats[i], collectionStats, termStats[i]);
  }
  return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
}
项目:search    文件:SimilarityBase.java   
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
 *  Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  // #positions(field) must be >= #positions(term)
  assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
  long numberOfDocuments = collectionStats.maxDoc();

  long docFreq = termStats.docFreq();
  long totalTermFreq = termStats.totalTermFreq();

  // codec does not supply totalTermFreq: substitute docFreq
  if (totalTermFreq == -1) {
    totalTermFreq = docFreq;
  }

  final long numberOfFieldTokens;
  final float avgFieldLength;

  long sumTotalTermFreq = collectionStats.sumTotalTermFreq();

  if (sumTotalTermFreq <= 0) {
    // field does not exist;
    // We have to provide something if codec doesnt supply these measures,
    // or if someone omitted frequencies for the field... negative values cause
    // NaN/Inf for some scorers.
    numberOfFieldTokens = docFreq;
    avgFieldLength = 1;
  } else {
    numberOfFieldTokens = sumTotalTermFreq;
    avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
  }

  // TODO: add sumDocFreq for field (numberOfFieldPostings)
  stats.setNumberOfDocuments(numberOfDocuments);
  stats.setNumberOfFieldTokens(numberOfFieldTokens);
  stats.setAvgFieldLength(avgFieldLength);
  stats.setDocFreq(docFreq);
  stats.setTotalTermFreq(totalTermFreq);
}
项目:search    文件:BM25Similarity.java   
/** The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
 * or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
 * or any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
  final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
  if (sumTotalTermFreq <= 0) {
    return 1f;       // field does not exist, or stat is unsupported
  } else {
    return (float) (sumTotalTermFreq / (double) collectionStats.maxDoc());
  }
}
项目:search    文件:BM25Similarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:search    文件:BM25Similarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
}
项目:search    文件:PerFieldSimilarityWrapper.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  PerFieldSimWeight weight = new PerFieldSimWeight();
  weight.delegate = get(collectionStats.field());
  weight.delegateWeight = weight.delegate.computeWeight(queryBoost, collectionStats, termStats);
  return weight;
}
项目:search    文件:MultiSimilarity.java   
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  SimWeight subStats[] = new SimWeight[sims.length];
  for (int i = 0; i < subStats.length; i++) {
    subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
  }
  return new MultiStats(subStats);
}
项目:search    文件:LMSimilarity.java   
/**
 * Computes the collection probability of the current term in addition to the
 * usual statistics.
 */
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  super.fillBasicStats(stats, collectionStats, termStats);
  LMStats lmStats = (LMStats) stats;
  lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
项目:search    文件:TFIDFSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:search    文件:TFIDFSimilarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  final Explanation idf = termStats.length == 1
  ? idfExplain(collectionStats, termStats[0])
  : idfExplain(collectionStats, termStats);
  return new IDFStats(collectionStats.field(), idf, queryBoost);
}