private void checkIndexContent(final String elementId, final String fieldContent, final int expectedAmount) throws IOException { final IndexReader reader = IndexManager.getInstance().getIndex().getIndexReader(); final IndexSearcher searcher = new IndexSearcher(reader); final TopDocs topDocs = searcher.search(new TermQuery(new Term(FIELDNAME, fieldContent)), expectedAmount + 10); assertNotNull(topDocs); assertTrue(topDocs.totalHits == expectedAmount); if(expectedAmount > 0) { final ScoreDoc scoreDoc = topDocs.scoreDocs[0]; assertNotNull(scoreDoc); final Document doc = reader.document(scoreDoc.doc); assertNotNull(doc); assertEquals(fieldContent, doc.get(FIELDNAME)); assertEquals(elementId, doc.get(IIndexElement.FIELD_ID)); assertEquals(INDEX_TYPE, doc.get(IIndexElement.FIELD_INDEX_TYPE)); } }
@Override protected boolean doEquals(Object obj) { InternalTopHits other = (InternalTopHits) obj; if (from != other.from) return false; if (size != other.size) return false; if (topDocs.totalHits != other.topDocs.totalHits) return false; if (topDocs.scoreDocs.length != other.topDocs.scoreDocs.length) return false; for (int d = 0; d < topDocs.scoreDocs.length; d++) { ScoreDoc thisDoc = topDocs.scoreDocs[d]; ScoreDoc otherDoc = other.topDocs.scoreDocs[d]; if (thisDoc.doc != otherDoc.doc) return false; if (Double.compare(thisDoc.score, otherDoc.score) != 0) return false; if (thisDoc.shardIndex != otherDoc.shardIndex) return false; if (thisDoc instanceof FieldDoc) { if (false == (otherDoc instanceof FieldDoc)) return false; FieldDoc thisFieldDoc = (FieldDoc) thisDoc; FieldDoc otherFieldDoc = (FieldDoc) otherDoc; if (thisFieldDoc.fields.length != otherFieldDoc.fields.length) return false; for (int f = 0; f < thisFieldDoc.fields.length; f++) { if (false == thisFieldDoc.fields[f].equals(otherFieldDoc.fields[f])) return false; } } } return searchHits.equals(other.searchHits); }
@Override protected int doHashCode() { int hashCode = from; hashCode = 31 * hashCode + size; hashCode = 31 * hashCode + topDocs.totalHits; for (int d = 0; d < topDocs.scoreDocs.length; d++) { ScoreDoc doc = topDocs.scoreDocs[d]; hashCode = 31 * hashCode + doc.doc; hashCode = 31 * hashCode + Float.floatToIntBits(doc.score); hashCode = 31 * hashCode + doc.shardIndex; if (doc instanceof FieldDoc) { FieldDoc fieldDoc = (FieldDoc) doc; hashCode = 31 * hashCode + Arrays.hashCode(fieldDoc.fields); } } hashCode = 31 * hashCode + searchHits.hashCode(); return hashCode; }
public void replayRelatedMatches(ScoreDoc[] sd) throws IOException { final LeafBucketCollector leafCollector = deferred.getLeafCollector(readerContext); leafCollector.setScorer(this); currentScore = 0; currentDocId = -1; if (maxDocId < 0) { return; } for (ScoreDoc scoreDoc : sd) { // Doc ids from TopDocCollector are root-level Reader so // need rebasing int rebased = scoreDoc.doc - readerContext.docBase; if ((rebased >= 0) && (rebased <= maxDocId)) { currentScore = scoreDoc.score; currentDocId = rebased; // We stored the bucket ID in Lucene's shardIndex property // for convenience. leafCollector.collect(rebased, scoreDoc.shardIndex); } } }
public ScoreDoc[] getLastEmittedDocPerShard(ReducedQueryPhase reducedQueryPhase, ScoreDoc[] sortedScoreDocs, int numShards) { ScoreDoc[] lastEmittedDocPerShard = new ScoreDoc[numShards]; if (reducedQueryPhase.isEmpty() == false) { // from is always zero as when we use scroll, we ignore from long size = Math.min(reducedQueryPhase.fetchHits, reducedQueryPhase.oneResult.size()); // with collapsing we can have more hits than sorted docs size = Math.min(sortedScoreDocs.length, size); for (int sortedDocsIndex = 0; sortedDocsIndex < size; sortedDocsIndex++) { ScoreDoc scoreDoc = sortedScoreDocs[sortedDocsIndex]; lastEmittedDocPerShard[scoreDoc.shardIndex] = scoreDoc; } } return lastEmittedDocPerShard; }
@SneakyThrows @Override public LuceneSearchHit next() { if (!hasNext()) { throw new NoSuchElementException(); } if (current != null) { current.unlinkSearcher(); } i++; ScoreDoc scoreDoc = topDocs.scoreDocs[i]; current = new LuceneSearchHitImpl(index.getName(), searcher, scoreDoc.doc, scoreDoc.score); return current; }
public void testSort() throws Exception { List<CompletionSuggestion> suggestions = new ArrayList<>(); for (int i = 0; i < randomIntBetween(1, 5); i++) { suggestions.add(new CompletionSuggestion(randomAsciiOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20))); } int nShards = randomIntBetween(1, 20); int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); AtomicArray<QuerySearchResultProvider> results = generateQueryResults(nShards, suggestions, queryResultSize, false); ScoreDoc[] sortedDocs = searchPhaseController.sortDocs(true, results); int accumulatedLength = Math.min(queryResultSize, getTotalQueryHits(results)); for (Suggest.Suggestion<?> suggestion : reducedSuggest(results)) { int suggestionSize = suggestion.getEntries().get(0).getOptions().size(); accumulatedLength += suggestionSize; } assertThat(sortedDocs.length, equalTo(accumulatedLength)); }
public static void main(String[] args) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_DIRECTORY))); IndexSearcher indexSearcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer); String searchString = "shakespeare"; Query query = queryParser.parse(searchString); TopDocs results = indexSearcher.search(query, 5); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); for(int i=0;i<hits.length;++i) { int docId = hits[i].doc; Document d = indexSearcher.doc(docId); System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score); } }
/** * Get the ResultDoc at the given location. * * @param i Index * @return The ResultDoc */ public ResultDoc get(int i) { //prtln("get() i: " + i + " num gets(): " + (++numGets)); if (i < 0 || i >= size()) throw new IndexOutOfBoundsException("Index " + i + " is out of bounds. Must be greater than or equal to 0 and less than " + size()); // First check to see if this List is backed by an array and return from there: if (_resultDocs != null) { return _resultDocs[i]; } // If not backed by an array, fetch from the index: ScoreDoc scoreDoc = _topDocs.scoreDocs[i]; return new ResultDoc(_resultDocConfig, scoreDoc.doc, scoreDoc.score); }
private void synTokenQuery(String search, final int numbOfResults, final double minLuceneScore, Map<String, Float> result, IndexSearcher searcher) throws ParseException, IOException { QueryParser parser = new QueryParser(Version.LUCENE_46, "surfaceFormTokens", new StandardAnalyzer(Version.LUCENE_46)); search = QueryParser.escape(search); Query q = parser.parse(search); /* * Works only in String field!! */ // Query q = new FuzzyQuery(new Term("surfaceFormTokens", // QueryParser.escape(search)), 2); TopDocs top = searcher.search(q, numbOfResults); for (ScoreDoc doc : top.scoreDocs) { if (doc.score >= minLuceneScore) { final String key = searcher.doc(doc.doc).get("conceptID"); if (result.getOrDefault(key, 0f) < doc.score) { result.put(key, doc.score); } } } }
/** * 查询方法 * @throws IOException * @throws CorruptIndexException * @throws ParseException */ public List Search(String searchString,LuceneResultCollector luceneResultCollector) throws CorruptIndexException, IOException, ParseException{ //方法一: System.out.println(this.indexSettings.getAnalyzer().getClass()+"----分词选择"); QueryParser q = new QueryParser(Version.LUCENE_44, "summary", this.indexSettings.getAnalyzer()); String search = new String(searchString.getBytes("ISO-8859-1"),"UTF-8"); System.out.println(search+"----------搜索的词语dd"); Query query = q.parse(search); //方法二: /* Term t = new Term("title", searchString); TermQuery query = new TermQuery(t); */ System.out.println(query.toString()+"--------query.tostring"); ScoreDoc[] docs = this.indexSearcher.search(query,100).scoreDocs; System.out.println("一共有:"+docs.length+"条记录"); List result = luceneResultCollector.collect(docs, this.indexSearcher); return result; }
public Map<String, Integer> search(String word, String field, int maxSearch) { if (indexSearcher == null) { initialize(index); } Map<String, Integer> verbFreqs = new HashMap<>(); QueryParser queryParser = new QueryParser(Version.LUCENE_36, field, analyzer); try { Query query = queryParser.parse(word); TopDocs topDocs = indexSearcher.search(query, maxSearch); ScoreDoc[] doc = topDocs.scoreDocs; for (int i = 0; i < maxSearch && i < doc.length; ++i) { int documentId = doc[i].doc; Document document = indexSearcher.doc(documentId); String verb = document.get(VERB); String frequency = document.get(FREQ); verbFreqs.put(verb, Integer.parseInt(frequency)); } } catch (ParseException | IOException e) { log.warn("Error searching Lucene index.", e); } return verbFreqs; }
void innerExecuteFetchPhase() throws Exception { boolean useScroll = request.scroll() != null; sortedShardList = searchPhaseController.sortDocs(useScroll, queryResults); searchPhaseController.fillDocIdsToLoad(docIdsToLoad, sortedShardList); if (docIdsToLoad.asList().isEmpty()) { finishHim(); return; } final ScoreDoc[] lastEmittedDocPerShard = searchPhaseController.getLastEmittedDocPerShard( request, sortedShardList, firstResults.length() ); final AtomicInteger counter = new AtomicInteger(docIdsToLoad.asList().size()); for (final AtomicArray.Entry<IntArrayList> entry : docIdsToLoad.asList()) { QuerySearchResult queryResult = queryResults.get(entry.index); DiscoveryNode node = nodes.get(queryResult.shardTarget().nodeId()); ShardFetchSearchRequest fetchSearchRequest = createFetchRequest(queryResult, entry, lastEmittedDocPerShard); executeFetch(entry.index, queryResult.shardTarget(), counter, fetchSearchRequest, node); } }
@Override protected void moveToSecondPhase() throws Exception { boolean useScroll = request.scroll() != null; sortedShardList = searchPhaseController.sortDocs(useScroll, firstResults); searchPhaseController.fillDocIdsToLoad(docIdsToLoad, sortedShardList); if (docIdsToLoad.asList().isEmpty()) { finishHim(); return; } final ScoreDoc[] lastEmittedDocPerShard = searchPhaseController.getLastEmittedDocPerShard( request, sortedShardList, firstResults.length() ); final AtomicInteger counter = new AtomicInteger(docIdsToLoad.asList().size()); for (AtomicArray.Entry<IntArrayList> entry : docIdsToLoad.asList()) { QuerySearchResultProvider queryResult = firstResults.get(entry.index); DiscoveryNode node = nodes.get(queryResult.shardTarget().nodeId()); ShardFetchSearchRequest fetchSearchRequest = createFetchRequest(queryResult.queryResult(), entry, lastEmittedDocPerShard); executeFetch(entry.index, queryResult.shardTarget(), counter, fetchSearchRequest, node); } }
public ArrayList<String> search(String searchQuery) throws IOException, ParseException { ArrayList<String> retList = new ArrayList<String>(); searcher = new Searcher(indexDir); long startTime = System.currentTimeMillis(); TopDocs hits = searcher.search(searchQuery); long endTime = System.currentTimeMillis(); retList.add(hits.totalHits + " documents found. Time :" + (endTime - startTime)); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = searcher.getDocument(scoreDoc); retList.add("FILE_PATH: " + doc.get(LuceneConstants.FILE_PATH)); retList.add("FILE_NAME: " + doc.get(LuceneConstants.FILE_NAME)); } searcher.close(); return retList; }
private List<Doc> toDocs(ScoreDoc[] hits, Searcher searcher) throws IOException{ List<Doc> documentList = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc = hits[i]; Document doc = searcher.doc(scoreDoc.doc); IndexableField idField = doc.getField("_id"); if(idField == null){ // deleted between index hit and retrieval. continue; } final BytesRef ref = idField.binaryValue(); final byte[] bytes = new byte[ref.length]; System.arraycopy(ref.bytes, ref.offset, bytes, 0, ref.length); Doc outputDoc = new Doc(scoreDoc, bytes, 0); documentList.add(outputDoc); } return documentList; }
@Override public ScoreDoc[] prefixSearch(String keywords) throws IOException { if (StringUtils.isEmpty(keywords) || keywords.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", keywords); return null; } Sort sort = new Sort(new SortField("downloadRank", SortField.INT, true)); Term nameFldTerm = new Term(fieldName, keywords); PrefixQuery nameFldQuery = new PrefixQuery(nameFldTerm); NumericRangeQuery<Integer> catalogQuery = NumericRangeQuery.newIntRange("catalog", (int) EnumCatalog.SOFT.getCatalog(), (int) EnumCatalog.GAME.getCatalog(), true, true); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(catalogQuery, Occur.MUST); booleanQuery.add(nameFldQuery, Occur.MUST); TopDocs topDocs = quickTipsSearcher.search(booleanQuery, appConfig.getQuickTipsNum() * 2, sort); ScoreDoc[] docs = topDocs.scoreDocs; return docs; }
/** * 对排好序的TopDocs,进行ScoreDoc去重操作 * * @param srcTopDocs * @return * @throws IOException */ public static TopDocs mergeDuplicateDocId(TopDocs srcTopDocs) throws IOException { if (srcTopDocs == null) { return null; } final ScoreDoc[] scoreDocs = srcTopDocs.scoreDocs; int totalHits = srcTopDocs.totalHits; List<ScoreDoc> scoreDocList = new ArrayList<ScoreDoc>(); ScoreDoc preScoreDoc = null; int scoreDocSize = 0; for (int i = 0; i < scoreDocs.length; i++) { if (i > 0) { preScoreDoc = scoreDocList.get(scoreDocSize - 1); if (preScoreDoc.doc == scoreDocs[i].doc) { totalHits--; continue; } } scoreDocList.add(scoreDocs[i]); scoreDocSize++; } final ScoreDoc[] hits = new ScoreDoc[scoreDocSize]; scoreDocList.toArray(hits); return new TopDocs(totalHits, hits, srcTopDocs.getMaxScore()); }
@Override public ScoreDoc[] prefixSearch(String q) throws IOException { if (StringUtils.isEmpty(q) || q.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", q); return null; } final TopDocs[] rstTopDocs = new TopDocs[2]; final Query nameFldQuery = new PrefixQuery(new Term(NAME.getName(), q)); rstTopDocs[0] = indexSearcher.search(nameFldQuery, appConfig.getQuickTipsNum() * 2, sort); final Query downLoadRankQuery = NumericRangeQuery.newIntRange(DOWNOLOAD_RANK.getName(), MIN_DOWNLOAD_RANK, Integer.MAX_VALUE, true, false); // 从下载量最高的1000条记录中,再过滤符合关键字的记录 rstTopDocs[1] = indexSearcher.search(downLoadRankQuery, MAX_TOP, sort); TopDocs rst = TopDocsUtil.mergeDuplicateDocId(TopDocs.merge(sort, MAX_TOP + appConfig.getQuickTipsNum() * 2, rstTopDocs)); if (rst != null) { return rst.scoreDocs; } return null; }
/** * 对排好序的TopDocs,进行ScoreDoc去重操作 * @param srcTopDocs * @return * @throws IOException */ public static TopDocs mergeDuplicateDocId(TopDocs srcTopDocs) throws IOException { if(srcTopDocs == null) { return null; } final ScoreDoc[] scoreDocs = srcTopDocs.scoreDocs; int totalHits = srcTopDocs.totalHits; List<ScoreDoc> scoreDocList = new ArrayList<ScoreDoc>(scoreDocs.length); ScoreDoc preScoreDoc = null; int scoreDocSize = 0; for(int i = 0; i < scoreDocs.length; i++) { if(i > 0) { preScoreDoc = scoreDocList.get(scoreDocSize - 1); if(preScoreDoc.doc == scoreDocs[i].doc) { totalHits--; continue; } } scoreDocList.add(scoreDocs[i]); scoreDocSize++; } final ScoreDoc[] hits = new ScoreDoc[scoreDocSize]; scoreDocList.toArray(hits); return new TopDocs(totalHits, hits, srcTopDocs.getMaxScore()); }
@Override public ScoreDoc[] prefixSearch(String q) throws IOException { if (StringUtils.isEmpty(q) || q.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", q); return null; } final TopDocs[] rstTopDocs = new TopDocs[2]; final Query nameFldQuery = new PrefixQuery(new Term(NAME.getName(), q)); rstTopDocs[0] = indexSearcher.search(nameFldQuery, appConfig.getQuickTipsNum() * 2, sort); final Query downLoadRankQuery = NumericRangeQuery.newIntRange(DOWNOLOAD_RANK.getName(), MIN_DOWNLOAD_RANK, Integer.MAX_VALUE, true, false); //从下载量最高的1000条记录中,再过滤符合关键字的记录 rstTopDocs[1] = indexSearcher.search(downLoadRankQuery, MAX_TOP, sort); TopDocs rst = TopDocsUtil.mergeDuplicateDocId(TopDocs.merge(sort, MAX_TOP + appConfig.getQuickTipsNum() * 2, rstTopDocs)); if(rst != null) { return rst.scoreDocs; } return null; }
static void search(String className, Indexer indexer, Collection<IndexingContext> contexts, List<? super ClassUsage> results) throws IOException { String searchString = crc32base64(className.replace('.', '/')); Query refClassQuery = indexer.constructQuery(ClassDependencyIndexCreator.FLD_NB_DEPENDENCY_CLASS.getOntology(), new StringSearchExpression(searchString)); TopScoreDocCollector collector = TopScoreDocCollector.create(NexusRepositoryIndexerImpl.MAX_RESULT_COUNT, null); for (IndexingContext context : contexts) { IndexSearcher searcher = context.acquireIndexSearcher(); try { searcher.search(refClassQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; LOG.log(Level.FINER, "for {0} ~ {1} found {2} hits", new Object[] {className, searchString, hits.length}); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); String fldValue = d.get(ClassDependencyIndexCreator.NB_DEPENDENCY_CLASSES); LOG.log(Level.FINER, "{0} uses: {1}", new Object[] {className, fldValue}); Set<String> refClasses = parseField(searchString, fldValue, d.get(ArtifactInfo.NAMES)); if (!refClasses.isEmpty()) { ArtifactInfo ai = IndexUtils.constructArtifactInfo(d, context); if (ai != null) { ai.setRepository(context.getRepositoryId()); List<NBVersionInfo> version = NexusRepositoryIndexerImpl.convertToNBVersionInfo(Collections.singleton(ai)); if (!version.isEmpty()) { results.add(new ClassUsage(version.get(0), refClasses)); } } } } } finally { context.releaseIndexSearcher(searcher); } } }
/** * Executes the given {@link Query} and returns a {@link DocumentsSearchResult} with * the found documents and meta information about them. * * @param query the query to execute * @param options the additional options to execute the query. * @return {@link DocumentsSearchResult} with the found {@link Document}. */ public DocumentsSearchResult search(final Query query, final SearchOptions options) { final DocumentsSearchResult result = new DocumentsSearchResult(); final TopDocs topDocs = getTopDocs(query, options); if(topDocs != null) { result.setTotalHits(topDocs.totalHits); final Index index = IndexManager.getInstance().getIndex(); final IndexReader reader = index.getIndexReader(); try { LOGGER.debug("Found these documents (total = {}) for query '{}':", topDocs.totalHits, query); int counter = 0; for (final ScoreDoc scoreDoc : topDocs.scoreDocs) { final Document document = reader.document(scoreDoc.doc); if(document != null) { LOGGER.debug("- Score: " + StringUtils.rightPad(Float.toString(scoreDoc.score), 8) + " Doc: " + document.get(IIndexElement.FIELD_ID)); result.addDocument(document, scoreDoc.score); } // if it's the last document -> set ScoreDoc to result if(++counter == topDocs.scoreDocs.length) { result.setLastScoreDoc(scoreDoc); } } } catch(final IOException e) { LOGGER.error("Can't get documents for topdocs.", e); } } return result; }
public Map<DocumentType, List<SearchResult>> search(String searchString) throws ParseException { Map<DocumentType, List<SearchResult>> resultMap = new TreeMap<DocumentType, List<SearchResult>>(); try { Query query = parser.parse(searchString); final SecondPassGroupingCollector collector = new SecondPassGroupingCollector("documentType", searchGroups, Sort.RELEVANCE, null, 5, true, false, true); searcher.search(query, collector); final TopGroups groups = collector.getTopGroups(0); for (GroupDocs groupDocs : groups.groups) { DocumentType docType = DocumentType.valueOf(groupDocs.groupValue); List<SearchResult> results = new ArrayList<SearchResult>(); for (ScoreDoc scoreDoc : groupDocs.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); SearchResult result = new SearchResult( docType, doc.get("name"), doc.get("url"), doc.get("className"), doc.get("package"), doc.get("ensemblePath"), doc.get("shortDescription") ); results.add(result); } resultMap.put(docType, results); } } catch (IOException e) { e.printStackTrace(); } return resultMap; }
private void runDeferredAggs() throws IOException { List<ScoreDoc> allDocs = new ArrayList<>(shardSize); for (int i = 0; i < perBucketSamples.size(); i++) { PerParentBucketSamples perBucketSample = perBucketSamples.get(i); if (perBucketSample == null) { continue; } perBucketSample.getMatches(allDocs); } // Sort the top matches by docID for the benefit of deferred collector ScoreDoc[] docsArr = allDocs.toArray(new ScoreDoc[allDocs.size()]); Arrays.sort(docsArr, (o1, o2) -> { if(o1.doc == o2.doc){ return o1.shardIndex - o2.shardIndex; } return o1.doc - o2.doc; }); try { for (PerSegmentCollects perSegDocs : entries) { perSegDocs.replayRelatedMatches(docsArr); } } catch (IOException e) { throw new ElasticsearchException("IOException collecting best scoring results", e); } deferred.postCollection(); }
public void getMatches(List<ScoreDoc> allDocs) { TopDocs topDocs = tdc.topDocs(); ScoreDoc[] sd = topDocs.scoreDocs; matchedDocs = sd.length; for (ScoreDoc scoreDoc : sd) { // A bit of a hack to (ab)use shardIndex property here to // hold a bucket ID but avoids allocating extra data structures // and users should have bigger concerns if bucket IDs // exceed int capacity.. scoreDoc.shardIndex = (int) parentBucket; } allDocs.addAll(Arrays.asList(sd)); }
public static void writeScoreDoc(StreamOutput out, ScoreDoc scoreDoc) throws IOException { if (!scoreDoc.getClass().equals(ScoreDoc.class)) { throw new IllegalArgumentException("This method can only be used to serialize a ScoreDoc, not a " + scoreDoc.getClass()); } out.writeVInt(scoreDoc.doc); out.writeFloat(scoreDoc.score); }
private void moveToNextPhase(SearchPhaseController searchPhaseController, ScoreDoc[] sortedDocs, String scrollId, SearchPhaseController.ReducedQueryPhase reducedQueryPhase, AtomicArray<? extends QuerySearchResultProvider> fetchResultsArr) { final InternalSearchResponse internalResponse = searchPhaseController.merge(context.getRequest().scroll() != null, sortedDocs, reducedQueryPhase, fetchResultsArr); context.executeNextPhase(this, nextPhaseFactory.apply(context.buildSearchResponse(internalResponse, scrollId))); }
private void innerFinishHim() throws Exception { ScoreDoc[] sortedShardDocs = searchPhaseController.sortDocs(true, queryFetchResults); final InternalSearchResponse internalResponse = searchPhaseController.merge(true, sortedShardDocs, searchPhaseController.reducedQueryPhase(queryFetchResults.asList()), queryFetchResults); String scrollId = null; if (request.scroll() != null) { scrollId = request.scrollId(); } listener.onResponse(new SearchResponse(internalResponse, scrollId, this.scrollId.getContext().length, successfulOps.get(), buildTookInMillis(), buildShardFailures())); }
/** * Builds an array, with potential null elements, with docs to load. */ public IntArrayList[] fillDocIdsToLoad(int numShards, ScoreDoc[] shardDocs) { IntArrayList[] docIdsToLoad = new IntArrayList[numShards]; for (ScoreDoc shardDoc : shardDocs) { IntArrayList shardDocIdsToLoad = docIdsToLoad[shardDoc.shardIndex]; if (shardDocIdsToLoad == null) { shardDocIdsToLoad = docIdsToLoad[shardDoc.shardIndex] = new IntArrayList(); } shardDocIdsToLoad.add(shardDoc.doc); } return docIdsToLoad; }
/** * Enriches search hits and completion suggestion hits from <code>sortedDocs</code> using <code>fetchResultsArr</code>, * merges suggestions, aggregations and profile results * * Expects sortedDocs to have top search docs across all shards, optionally followed by top suggest docs for each named * completion suggestion ordered by suggestion name */ public InternalSearchResponse merge(boolean ignoreFrom, ScoreDoc[] sortedDocs, ReducedQueryPhase reducedQueryPhase, AtomicArray<? extends QuerySearchResultProvider> fetchResultsArr) { if (reducedQueryPhase.isEmpty()) { return InternalSearchResponse.empty(); } List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> fetchResults = fetchResultsArr.asList(); SearchHits hits = getHits(reducedQueryPhase, ignoreFrom, sortedDocs, fetchResultsArr); if (reducedQueryPhase.suggest != null) { if (!fetchResults.isEmpty()) { int currentOffset = hits.getHits().length; for (CompletionSuggestion suggestion : reducedQueryPhase.suggest.filter(CompletionSuggestion.class)) { final List<CompletionSuggestion.Entry.Option> suggestionOptions = suggestion.getOptions(); for (int scoreDocIndex = currentOffset; scoreDocIndex < currentOffset + suggestionOptions.size(); scoreDocIndex++) { ScoreDoc shardDoc = sortedDocs[scoreDocIndex]; QuerySearchResultProvider searchResultProvider = fetchResultsArr.get(shardDoc.shardIndex); if (searchResultProvider == null) { continue; } FetchSearchResult fetchResult = searchResultProvider.fetchResult(); int fetchResultIndex = fetchResult.counterGetAndIncrement(); if (fetchResultIndex < fetchResult.hits().internalHits().length) { SearchHit hit = fetchResult.hits().internalHits()[fetchResultIndex]; CompletionSuggestion.Entry.Option suggestOption = suggestionOptions.get(scoreDocIndex - currentOffset); hit.score(shardDoc.score); hit.shard(fetchResult.shardTarget()); suggestOption.setHit(hit); } } currentOffset += suggestionOptions.size(); } assert currentOffset == sortedDocs.length : "expected no more score doc slices"; } } return reducedQueryPhase.buildResponse(hits); }
public void testReplay() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numDocs = randomIntBetween(1, 128); int maxNumValues = randomInt(16); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(new StringField("field", String.valueOf(randomInt(maxNumValues)), Field.Store.NO)); indexWriter.addDocument(document); } indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TermQuery termQuery = new TermQuery(new Term("field", String.valueOf(randomInt(maxNumValues)))); TopDocs topDocs = indexSearcher.search(termQuery, numDocs); BestDocsDeferringCollector collector = new BestDocsDeferringCollector(numDocs, new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService())); Set<Integer> deferredCollectedDocIds = new HashSet<>(); collector.setDeferredCollector(Collections.singleton(testCollector(deferredCollectedDocIds))); collector.preCollection(); indexSearcher.search(termQuery, collector); collector.postCollection(); collector.replay(0); assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { assertTrue("expected docid [" + scoreDoc.doc + "] is missing", deferredCollectedDocIds.contains(scoreDoc.doc)); } collector.close(); indexReader.close(); directory.close(); }
public void testReplay() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numDocs = randomInt(128); int maxNumValues = randomInt(16); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(new StringField("field", String.valueOf(randomInt(maxNumValues)), Field.Store.NO)); indexWriter.addDocument(document); } indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TermQuery termQuery = new TermQuery(new Term("field", String.valueOf(randomInt(maxNumValues)))); TopDocs topDocs = indexSearcher.search(termQuery, numDocs); SearchContext searchContext = createSearchContext(indexSearcher, createIndexSettings()); BestBucketsDeferringCollector collector = new BestBucketsDeferringCollector(searchContext); Set<Integer> deferredCollectedDocIds = new HashSet<>(); collector.setDeferredCollector(Collections.singleton(bla(deferredCollectedDocIds))); collector.preCollection(); indexSearcher.search(termQuery, collector); collector.postCollection(); collector.replay(0); assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { assertTrue("expected docid [" + scoreDoc.doc + "] is missing", deferredCollectedDocIds.contains(scoreDoc.doc)); } indexReader.close(); directory.close(); }
public void testFailPhaseOnException() throws IOException { AtomicArray<DfsSearchResult> results = new AtomicArray<>(2); AtomicReference<AtomicArray<QuerySearchResultProvider>> responseRef = new AtomicReference<>(); results.set(0, new DfsSearchResult(1, new SearchShardTarget("node1", new Index("test", "na"), 0))); results.set(1, new DfsSearchResult(2, new SearchShardTarget("node2", new Index("test", "na"), 0))); results.get(0).termsStatistics(new Term[0], new TermStatistics[0]); results.get(1).termsStatistics(new Term[0], new TermStatistics[0]); SearchPhaseController controller = new SearchPhaseController(Settings.EMPTY, BigArrays.NON_RECYCLING_INSTANCE, null); SearchTransportService searchTransportService = new SearchTransportService( Settings.builder().put("search.remote.connect", false).build(), null, null) { @Override public void sendExecuteQuery(Transport.Connection connection, QuerySearchRequest request, SearchTask task, ActionListener<QuerySearchResult> listener) { if (request.id() == 1) { QuerySearchResult queryResult = new QuerySearchResult(123, new SearchShardTarget("node1", new Index("test", "na"), 0)); queryResult.topDocs(new TopDocs(1, new ScoreDoc[] {new ScoreDoc(42, 1.0F)}, 2.0F), new DocValueFormat[0]); queryResult.size(2); // the size of the result set listener.onResponse(queryResult); } else if (request.id() == 2) { throw new UncheckedIOException(new MockDirectoryWrapper.FakeIOException()); } else { fail("no such request ID: " + request.id()); } } }; MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(2); mockSearchPhaseContext.searchTransport = searchTransportService; DfsQueryPhase phase = new DfsQueryPhase(results, controller, (response) -> new SearchPhase("test") { @Override public void run() throws IOException { responseRef.set(response.results); } }, mockSearchPhaseContext); assertEquals("dfs_query", phase.getName()); expectThrows(UncheckedIOException.class, () -> phase.run()); assertTrue(mockSearchPhaseContext.releasedSearchContexts.isEmpty()); // phase execution will clean up on the contexts }
public void testSortIsIdempotent() throws IOException { int nShards = randomIntBetween(1, 20); int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); AtomicArray<QuerySearchResultProvider> results = generateQueryResults(nShards, Collections.emptyList(), queryResultSize, randomBoolean() || true); boolean ignoreFrom = randomBoolean(); ScoreDoc[] sortedDocs = searchPhaseController.sortDocs(ignoreFrom, results); ScoreDoc[] sortedDocs2 = searchPhaseController.sortDocs(ignoreFrom, results); assertArrayEquals(sortedDocs, sortedDocs2); }
private ScoreDoc[] getTopShardDocs(AtomicArray<QuerySearchResultProvider> results) throws IOException { List<AtomicArray.Entry<QuerySearchResultProvider>> resultList = results.asList(); TopDocs[] shardTopDocs = new TopDocs[resultList.size()]; for (int i = 0; i < resultList.size(); i++) { shardTopDocs[i] = resultList.get(i).value.queryResult().topDocs(); } int topN = Math.min(results.get(0).queryResult().size(), getTotalQueryHits(results)); return TopDocs.merge(topN, shardTopDocs).scoreDocs; }
public void testConsumerConcurrently() throws InterruptedException { int expectedNumResults = randomIntBetween(1, 100); int bufferSize = randomIntBetween(2, 200); SearchRequest request = new SearchRequest(); request.source(new SearchSourceBuilder().aggregation(AggregationBuilders.avg("foo"))); request.setBatchedReduceSize(bufferSize); InitialSearchPhase.SearchPhaseResults<QuerySearchResultProvider> consumer = searchPhaseController.newSearchPhaseResults(request, expectedNumResults); AtomicInteger max = new AtomicInteger(); CountDownLatch latch = new CountDownLatch(expectedNumResults); for (int i = 0; i < expectedNumResults; i++) { int id = i; Thread t = new Thread(() -> { int number = randomIntBetween(1, 1000); max.updateAndGet(prev -> Math.max(prev, number)); QuerySearchResult result = new QuerySearchResult(id, new SearchShardTarget("node", new Index("a", "b"), id)); result.topDocs(new TopDocs(id, new ScoreDoc[0], 0.0F), new DocValueFormat[0]); InternalAggregations aggs = new InternalAggregations(Arrays.asList(new InternalMax("test", (double) number, DocValueFormat.RAW, Collections.emptyList(), Collections.emptyMap()))); result.aggregations(aggs); consumer.consumeResult(id, result); latch.countDown(); }); t.start(); } latch.await(); SearchPhaseController.ReducedQueryPhase reduce = consumer.reduce(); InternalMax internalMax = (InternalMax) reduce.aggregations.asList().get(0); assertEquals(max.get(), internalMax.getValue(), 0.0D); }
private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields fields = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(fields, null, flags, fields); outResponse.setExists(true); dr.close(); dir.close(); }