Java 类org.apache.lucene.index.MultiFields 实例源码

项目:elasticsearch_my    文件:SmoothingModelTestCase.java   
/**
 * Test the WordScorer emitted by the smoothing model
 */
public void testBuildWordScorer() throws IOException {
    SmoothingModel testModel = createTestModel();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("field", new WhitespaceAnalyzer());
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
    Document doc = new Document();
    doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    DirectoryReader ir = DirectoryReader.open(writer);

    WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
            BytesRefs.toBytesRef(" "));
    assertWordScorer(wordScorer, testModel);
}
项目:lams    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:lams    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:Elasticsearch    文件:TransportFieldStatsTransportAction.java   
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
    ShardId shardId = request.shardId();
    Map<String, FieldStats> fieldStats = new HashMap<>();
    IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
    MapperService mapperService = indexServices.mapperService();
    IndexShard shard = indexServices.shardSafe(shardId.id());
    try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
        for (String field : request.getFields()) {
            MappedFieldType fieldType = mapperService.fullName(field);
            if (fieldType != null) {
                IndexReader reader = searcher.reader();
                Terms terms = MultiFields.getTerms(reader, field);
                if (terms != null) {
                    fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
                }
            } else {
                throw new IllegalArgumentException("field [" + field + "] doesn't exist");
            }
        }
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
    return new FieldStatsShardResponse(shardId, fieldStats);
}
项目:search    文件:TestOrdsBlockTree.java   
public void testSeekCeilNotFound() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  // Get empty string in there!
  doc.add(newStringField("field", "", Field.Store.NO));
  w.addDocument(doc);

  for(int i=0;i<36;i++) {
    doc = new Document();
    String term = "" + (char) (97+i);
    String term2 = "a" + (char) (97+i);
    doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
    w.addDocument(doc);
  }

  w.forceMerge(1);
  IndexReader r = w.getReader();
  TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);
  assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22})));
  assertEquals("a", te.term().utf8ToString());
  assertEquals(1L, te.ord());
  r.close();
  w.close();
  dir.close();
}
项目:search    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:search    文件:SimpleNaiveBayesClassifier.java   
/**
 * {@inheritDoc}
 */
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
  if (atomicReader == null) {
    throw new IOException("You must first call Classifier#train");
  }
  double max = - Double.MAX_VALUE;
  BytesRef foundClass = new BytesRef();

  Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
  TermsEnum termsEnum = terms.iterator(null);
  BytesRef next;
  String[] tokenizedDoc = tokenizeDoc(inputDocument);
  while ((next = termsEnum.next()) != null) {
    double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
    if (clVal > max) {
      max = clVal;
      foundClass = BytesRef.deepCopyOf(next);
    }
  }
  double score = 10 / Math.abs(max);
  return new ClassificationResult<>(foundClass, score);
}
项目:search    文件:LuceneTestCase.java   
/** 
 * checks that norms are the same across all fields 
 */
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
  Fields leftFields = MultiFields.getFields(leftReader);
  Fields rightFields = MultiFields.getFields(rightReader);
  // Fields could be null if there are no postings,
  // but then it must be null for both
  if (leftFields == null || rightFields == null) {
    assertNull(info, leftFields);
    assertNull(info, rightFields);
    return;
  }

  for (String field : leftFields) {
    NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
    NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
    if (leftNorms != null && rightNorms != null) {
      assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
    } else {
      assertNull(info, leftNorms);
      assertNull(info, rightNorms);
    }
  }
}
项目:search    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:search    文件:TestNumericRangeQuery32.java   
private int countTerms(MultiTermQuery q) throws Exception {
  final Terms terms = MultiFields.getTerms(reader, q.getField());
  if (terms == null)
    return 0;
  final TermsEnum termEnum = q.getTermsEnum(terms);
  assertNotNull(termEnum);
  int count = 0;
  BytesRef cur, last = null;
  while ((cur = termEnum.next()) != null) {
    count++;
    if (last != null) {
      assertTrue(last.compareTo(cur) < 0);
    }
    last = BytesRef.deepCopyOf(cur);
  } 
  // LUCENE-3314: the results after next() already returned null are undefined,
  // assertNull(termEnum.next());
  return count;
}
项目:search    文件:TestTermRangeQuery.java   
public void testAllDocs() throws Exception {
  initializeIndex(new String[]{"A", "B", "C", "D"});
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  query = new TermRangeQuery("content", null, null, false, false);
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  query = TermRangeQuery.newStringRange("content", "", null, true, false);
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  // and now anothe one
  query = TermRangeQuery.newStringRange("content", "B", null, true, false);
  assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
  reader.close();
}
项目:search    文件:TestWildcard.java   
/**
 * Tests if a WildcardQuery that has only a trailing * in the term is
 * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
 * preserved.
 */
public void testPrefixTerm() throws IOException {
  Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
  IndexReader reader = DirectoryReader.open(indexStore);
  IndexSearcher searcher = newSearcher(reader);

  MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
  assertMatches(searcher, wq, 2);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
  assertTrue(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);

  wq = new WildcardQuery(new Term("field", "*"));
  assertMatches(searcher, wq, 2);
  assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
  reader.close();
  indexStore.close();
}
项目:search    文件:TestNumericRangeQuery64.java   
private int countTerms(MultiTermQuery q) throws Exception {
  final Terms terms = MultiFields.getTerms(reader, q.getField());
  if (terms == null)
    return 0;
  final TermsEnum termEnum = q.getTermsEnum(terms);
  assertNotNull(termEnum);
  int count = 0;
  BytesRef cur, last = null;
  while ((cur = termEnum.next()) != null) {
    count++;
    if (last != null) {
      assertTrue(last.compareTo(cur) < 0);
    }
    last = BytesRef.deepCopyOf(cur);
  } 
  // LUCENE-3314: the results after next() already returned null are undefined,
  // assertNull(termEnum.next());
  return count;
}
项目:search    文件:TestRTGBase.java   
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:neo4j-lucene5-index    文件:LuceneAllDocumentsReader.java   
private DocIdSetIterator iterateAllDocs()
{
    IndexReader reader = searcher.getIndexReader();
    final Bits liveDocs = MultiFields.getLiveDocs( reader );
    final DocIdSetIterator allDocs = DocIdSetIterator.all( reader.maxDoc() );
    if ( liveDocs == null )
    {
        return allDocs;
    }

    return new FilteredDocIdSetIterator( allDocs )
    {
        @Override
        protected boolean match( int doc )
        {
            return liveDocs.get( doc );
        }
    };
}
项目:semanticvectors    文件:LuceneUtils.java   
/**
 * @param flagConfig Contains all information necessary for configuring LuceneUtils.
 *        {@link FlagConfig#luceneindexpath()} must be non-empty. 
 */
public LuceneUtils(FlagConfig flagConfig) throws IOException {
  if (flagConfig.luceneindexpath().isEmpty()) {
    throw new IllegalArgumentException(
        "-luceneindexpath is a required argument for initializing LuceneUtils instance.");
  }

  this.compositeReader = DirectoryReader.open(
      FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath())));
  this.leafReader = SlowCompositeReaderWrapper.wrap(compositeReader);
  MultiFields.getFields(compositeReader);
  this.flagConfig = flagConfig;
  if (!flagConfig.stoplistfile().isEmpty())
    loadStopWords(flagConfig.stoplistfile());

  if (!flagConfig.startlistfile().isEmpty())
    loadStartWords(flagConfig.startlistfile());

  VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n");
  VerbatimLogger.info("Fields in index are: " + String.join(", ", this.getFieldNames()) + "\n");
}
项目:lire    文件:TestUCID.java   
private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException {
    parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) {
        @Override
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature"));
        }
    };
    parallelIndexer.run();
    IndexReader reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE));
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    double queryCount = 0d;
    ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature");
    long ms = System.currentTimeMillis();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
        }
    }
    ms = System.currentTimeMillis() - ms;
    System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.')+1), (double) ms / queryCount);
}
项目:eswc-2015-semantic-typing    文件:TfIdfSearcher.java   
/**
 * 
 * @param reader
 * @return Map of term and its inverse document frequency
 * 
 * @throws IOException
 */
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
     Fields fields = MultiFields.getFields(reader); //get the fields of the index 

     for (String field: fields) 
     {   
         TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);

         BytesRef bytesRef;
         while ((bytesRef = termEnum.next()) != null) 
         {
             if (termEnum.seekExact(bytesRef)) 
             {
                 String term = bytesRef.utf8ToString(); 
                 float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
                 inverseDocFreq.put(term, idf);    
                 System.out.println(term +" idf= "+ idf);
             }
         }
     }

     return inverseDocFreq;
}
项目:read-open-source-code    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:NYBC    文件:TakmiSampleFixer.java   
/**
 * Internal utility: recount for a facet result node
 * 
 * @param fresNode
 *          result node to be recounted
 * @param docIds
 *          full set of matching documents.
 * @throws IOException If there is a low-level I/O error.
 */
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
  // TODO (Facet): change from void to return the new, smaller docSet, and use
  // that for the children, as this will make their intersection ops faster.
  // can do this only when the new set is "sufficiently" smaller.

  /* We need the category's path name in order to do its recounting.
   * If it is missing, because the option to label only part of the
   * facet results was exercise, we need to calculate them anyway, so
   * in essence sampling with recounting spends some extra cycles for
   * labeling results for which labels are not required. */
  if (fresNode.label == null) {
    fresNode.label = taxonomyReader.getPath(fresNode.ordinal);
  }
  CategoryPath catPath = fresNode.label;

  Term drillDownTerm = DrillDownQuery.term(searchParams.indexingParams, catPath);
  // TODO (Facet): avoid Multi*?
  Bits liveDocs = MultiFields.getLiveDocs(indexReader);
  int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
                                                                   drillDownTerm.field(), drillDownTerm.bytes(),
                                                                   0), docIds.iterator());
  fresNode.value = updatedCount;
}
项目:NYBC    文件:SimpleNaiveBayesClassifier.java   
/**
 * {@inheritDoc}
 */
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
  if (atomicReader == null) {
    throw new RuntimeException("need to train the classifier first");
  }
  double max = 0d;
  BytesRef foundClass = new BytesRef();

  Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
  TermsEnum termsEnum = terms.iterator(null);
  BytesRef next;
  String[] tokenizedDoc = tokenizeDoc(inputDocument);
  while ((next = termsEnum.next()) != null) {
    // TODO : turn it to be in log scale
    double clVal = calculatePrior(next) * calculateLikelihood(tokenizedDoc, next);
    if (clVal > max) {
      max = clVal;
      foundClass = next.clone();
    }
  }
  return new ClassificationResult<BytesRef>(foundClass, max);
}
项目:word2vec-lucene    文件:LuceneIndexCorpus.java   
@Override
public void learnVocab() throws IOException {
  super.learnVocab();

  final String field = ((LuceneIndexConfig)config).getField();
  final Terms terms = MultiFields.getTerms(reader, field);
  final BytesRef maxTerm = terms.getMax();
  final BytesRef minTerm = terms.getMin();
  Query q = new TermRangeQuery(field, minTerm, maxTerm, true, true);
  IndexSearcher searcher = new IndexSearcher(reader);
  topDocs = searcher.search(q, Integer.MAX_VALUE);

  TermsEnum termsEnum = null;
  termsEnum = terms.iterator(termsEnum);

  termsEnum.seekCeil(new BytesRef());
  BytesRef term = termsEnum.term();
  while(term != null){
    int p = addWordToVocab(term.utf8ToString());
    vocab[p].setCn((int)termsEnum.totalTermFreq());
    term = termsEnum.next();
  }
}
项目:NYBC    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:NYBC    文件:TestNumericRangeQuery32.java   
private int countTerms(MultiTermQuery q) throws Exception {
  final Terms terms = MultiFields.getTerms(reader, q.getField());
  if (terms == null)
    return 0;
  final TermsEnum termEnum = q.getTermsEnum(terms);
  assertNotNull(termEnum);
  int count = 0;
  BytesRef cur, last = null;
  while ((cur = termEnum.next()) != null) {
    count++;
    if (last != null) {
      assertTrue(last.compareTo(cur) < 0);
    }
    last = BytesRef.deepCopyOf(cur);
  } 
  // LUCENE-3314: the results after next() already returned null are undefined,
  // assertNull(termEnum.next());
  return count;
}
项目:NYBC    文件:TestTermRangeQuery.java   
public void testAllDocs() throws Exception {
  initializeIndex(new String[]{"A", "B", "C", "D"});
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = new IndexSearcher(reader);
  TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  query = new TermRangeQuery("content", null, null, false, false);
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  query = TermRangeQuery.newStringRange("content", "", null, true, false);
  assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
  // and now anothe one
  query = TermRangeQuery.newStringRange("content", "B", null, true, false);
  assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
  assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
  reader.close();
}
项目:NYBC    文件:TestWildcard.java   
/**
 * Tests if a WildcardQuery that has only a trailing * in the term is
 * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
 * preserved.
 */
public void testPrefixTerm() throws IOException {
  Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
  IndexReader reader = DirectoryReader.open(indexStore);
  IndexSearcher searcher = new IndexSearcher(reader);

  MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
  assertMatches(searcher, wq, 2);
  Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
  assertTrue(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);

  wq = new WildcardQuery(new Term("field", "*"));
  assertMatches(searcher, wq, 2);
  assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
  assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
  reader.close();
  indexStore.close();
}
项目:NYBC    文件:TestNumericRangeQuery64.java   
private int countTerms(MultiTermQuery q) throws Exception {
  final Terms terms = MultiFields.getTerms(reader, q.getField());
  if (terms == null)
    return 0;
  final TermsEnum termEnum = q.getTermsEnum(terms);
  assertNotNull(termEnum);
  int count = 0;
  BytesRef cur, last = null;
  while ((cur = termEnum.next()) != null) {
    count++;
    if (last != null) {
      assertTrue(last.compareTo(cur) < 0);
    }
    last = BytesRef.deepCopyOf(cur);
  } 
  // LUCENE-3314: the results after next() already returned null are undefined,
  // assertNull(termEnum.next());
  return count;
}
项目:read-open-source-code    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:NYBC    文件:TestRTGBase.java   
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:para    文件:LuceneUtils.java   
/**
 * Searches the Lucene index of a particular appid.
 * @param <P> type
 * @param dao {@link DAO}
 * @param appid appid
 * @param type type
 * @param query a query
 * @param pager a {@link Pager}
 * @return a list of ParaObjects
 */
public static <P extends ParaObject> List<P> searchQuery(DAO dao, String appid, String type, String query, Pager... pager) {
    if (StringUtils.isBlank(appid)) {
        return Collections.emptyList();
    }
    DirectoryReader ireader = null;
    try {
        ireader = getIndexReader(appid);
        if (ireader != null) {
            Pager page = getPager(pager);
            List<P> docs = searchQuery(dao, appid, searchQueryRaw(ireader, appid, type,
                    qs(query, MultiFields.getIndexedFields(ireader)), page), page);
            return docs;
        }
    } catch (Exception e) {
        logger.error(null, e);
    } finally {
        closeIndexReader(ireader);
    }
    return Collections.emptyList();
}
项目:incubator-blur    文件:TermDocIterable.java   
private boolean getNext() {
  try {
    int next = docsEnum.nextDoc();
    if (next == DocIdSetIterator.NO_MORE_DOCS) {
      return false;
    }
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    if (liveDocs != null) {
      while (!liveDocs.get(docsEnum.docID())) {
        next = docsEnum.nextDoc();
      }
    }
    return next == DocIdSetIterator.NO_MORE_DOCS ? false : true;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
项目:search-core    文件:TestRTGBase.java   
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:read-open-source-code    文件:SimpleNaiveBayesClassifier.java   
/**
 * {@inheritDoc}
 */
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
  if (atomicReader == null) {
    throw new IOException("You must first call Classifier#train");
  }
  double max = - Double.MAX_VALUE;
  BytesRef foundClass = new BytesRef();

  Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
  TermsEnum termsEnum = terms.iterator(null);
  BytesRef next;
  String[] tokenizedDoc = tokenizeDoc(inputDocument);
  while ((next = termsEnum.next()) != null) {
    double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
    if (clVal > max) {
      max = clVal;
      foundClass = BytesRef.deepCopyOf(next);
    }
  }
  double score = 10 / Math.abs(max);
  return new ClassificationResult<BytesRef>(foundClass, score);
}
项目:read-open-source-code    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:read-open-source-code    文件:SimpleNaiveBayesClassifier.java   
/**
 * {@inheritDoc}
 */
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
  if (atomicReader == null) {
    throw new IOException("You must first call Classifier#train");
  }
  double max = - Double.MAX_VALUE;
  BytesRef foundClass = new BytesRef();

  Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
  TermsEnum termsEnum = terms.iterator(null);
  BytesRef next;
  String[] tokenizedDoc = tokenizeDoc(inputDocument);
  while ((next = termsEnum.next()) != null) {
    double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
    if (clVal > max) {
      max = clVal;
      foundClass = BytesRef.deepCopyOf(next);
    }
  }
  double score = 10 / Math.abs(max);
  return new ClassificationResult<BytesRef>(foundClass, score);
}
项目:read-open-source-code    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:elasticsearch_my    文件:XMoreLikeThis.java   
/**
 * Return a query that will return docs like the passed lucene document ID.
 *
 * @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
 * @return a query that will return docs like the passed lucene document ID.
 */
public Query like(int docNum) throws IOException {
    if (fieldNames == null) {
        // gather list of valid fields from lucene
        Collection<String> fields = MultiFields.getIndexedFields(ir);
        fieldNames = fields.toArray(new String[fields.size()]);
    }

    return createQuery(retrieveTerms(docNum));
}
项目:elasticsearch_my    文件:MoreLikeThisQueryBuilderTests.java   
/**
 * Here we could go overboard and use a pre-generated indexed random document for a given Item,
 * but for now we'd prefer to simply return the id as the content of the document and that for
 * every field.
 */
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
    MemoryIndex index = new MemoryIndex();
    for (String fieldName : fieldNames) {
        index.addField(fieldName, text, new WhitespaceAnalyzer());
    }
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
项目:lams    文件:SrndTruncQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator(null);

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
项目:lams    文件:SrndPrefixQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}