/** * Executes the given {@link Query} and groups the found {@link Document}s by the given groupField. * @param groupField the field to group the {@link Document}s with. * @param query the query to execute * @param options the additional options to execute the query. * @return {@link TopGroups} or null if an error occurred. */ public TopGroups<BytesRef> getGroupedDocs(final String groupField, final Query query, final SearchOptions options) { final Index index = IndexManager.getInstance().getIndex(); final IndexReader reader = index.getIndexReader(); final IndexSearcher searcher = new IndexSearcher(reader); final GroupingSearch groupingSearch = new GroupingSearch(groupField); if(options.getSort() != null) { groupingSearch.setSortWithinGroup(options.getSort()); } TopGroups<BytesRef> topGroups = null; try { topGroups = groupingSearch.search(searcher, query, 0, options.getMaxResults()); } catch (final IOException e) { LOGGER.error("Can't execute group search because of an IOException.", e); } return topGroups; }
@Test public void testGrouping() throws IOException { Document doc1 = buildLuceneDoc("field1", "value1", true); Document doc2 = buildLuceneDoc("field1", "value1", true); Document doc3 = buildLuceneDoc("field1", "value2", true); index(doc1, doc2, doc3); GroupingSearch groupingSearch = new GroupingSearch("field1"); TopGroups<Object> topGroups = groupingSearch.search(openSearcher(), new MatchAllDocsQuery(), 0, 10); assertEquals(3, topGroups.totalHitCount); assertEquals(2, topGroups.groups.length); }
/** * Queries a feature index of a list of files, returning specified page of specified size. * If no paging parameters are passed, returns all results * * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param vcfInfoFields list of info fields to retrieve * @param page number of a page to display * @param pageSize number of entries per page * @param orderBy object, that specifies sorting * @return a {List} of {@code FeatureIndexEntry} objects that satisfy index query * @throws IOException if something is wrong in the filesystem */ public <T extends FeatureIndexEntry> IndexSearchResult<T> searchFileIndexesPaging(List<? extends FeatureFile> files, Query query, List<String> vcfInfoFields, Integer page, Integer pageSize, List<VcfFilterForm.OrderBy> orderBy) throws IOException { if (CollectionUtils.isEmpty(files)) { return new IndexSearchResult<>(Collections.emptyList(), false, 0); } List<FeatureIndexEntry> entries; int totalHits = 0; SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return new IndexSearchResult<>(Collections.emptyList(), false, 0); } IndexSearcher searcher = new IndexSearcher(reader); GroupingSearch groupingSearch = new GroupingSearch(FeatureIndexFields.UID.fieldName); setSorting(orderBy, groupingSearch, files); TopGroups<String> topGroups = groupingSearch.search(searcher, query, page == null ? 0 : (page - 1) * pageSize, page == null ? reader.numDocs() : pageSize); final ScoreDoc[] hits = new ScoreDoc[topGroups.groups.length]; for (int i = 0; i < topGroups.groups.length; i++) { hits[i] = topGroups.groups[i].scoreDocs[0]; } entries = new ArrayList<>(hits.length); for (ScoreDoc hit : hits) { entries.add(createIndexEntry(hit, new HashMap<>(), searcher, vcfInfoFields)); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return new IndexSearchResult<>((List<T>) entries, false, totalHits); }
@Test public void testTwoPassGrouping() throws Exception { Sort groupSort = new Sort(); Boolean fillFields = true; Boolean useCache = true; double cacheSize = 4.0; Boolean cacheScores = true; Boolean requiredTotalGroupCount = true; int groupOffset = 0; int groupLimit = 10; String groupField = "title"; String field = "title"; String qString = "Lucene"; GroupingSearch groupingSearch = new GroupingSearch(groupField); groupingSearch.setGroupSort(groupSort); groupingSearch.setFillSortFields(fillFields); if (useCache) { // Sets cache in MB groupingSearch.setCachingInMB(cacheSize, cacheScores); } if (requiredTotalGroupCount) { groupingSearch.setAllGroups(true); } Query query = new QueryParser(Version.LUCENE_42, field, analyzer).parse(qString); //TermQuery query = new TermQuery(new Term("content", searchTerm)); searcherManager.maybeRefresh(); IndexSearcher searcher = searcherManager.acquire(); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; printHits(searcher, hits); TopGroups<BytesRef> result = groupingSearch.search(searcher, query, groupOffset, groupLimit); // Render groupsResult... if (requiredTotalGroupCount) { int totalGroupCount = result.totalGroupCount; System.out.println("Total groups:" + totalGroupCount); for(int i=0; i<result.groups.length; i++){ printHits(searcher, result.groups[i].scoreDocs); } } }
@Test public void testOnePassGrouping() throws Exception { int topNGroups = 10; Boolean needsScores = true; String field = "groupEnd"; String value = "x"; Sort groupSort = new Sort(); Sort withinGroupSort = new Sort(); int groupOffset = 0; int docOffset = 0; int docsPerGroup = 100; Boolean fillFields = true; int groupLimit = 100; // Create Documents from your source: List<Document> oneGroup = new ArrayList<Document>(); Document doc = new Document(); doc.add(new TextField("title", "Lucene in action", Field.Store.YES)); // use a string field for isbn because we don't want it tokenized doc.add(new StringField("isbn", "12345", Field.Store.YES)); oneGroup.add(doc); Field groupEndField = new Field(field, value, Field.Store.NO, Field.Index.NOT_ANALYZED); oneGroup.get(oneGroup.size()-1).add(groupEndField); // You can also use writer.updateDocuments(); just be sure you // replace an entire previous doc block with this new one. For // example, each group could have a "groupID" field, with the same // value for all docs in this group: writer.addDocuments(oneGroup); // Set this once in your app & save away for reusing across all queries: Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term(field, value)))); searcherManager.maybeRefresh(); IndexSearcher searcher = searcherManager.acquire(); // Per search: BlockGroupingCollector collector1; collector1 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, groupEndDocs); searcher.search(new TermQuery(new Term("title", "Lucene")), collector1); TopGroups groupsResult = collector1.getTopGroups(withinGroupSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields); printHits(searcher, groupsResult.groups[0].scoreDocs); // Render groupsResult... Or alternatively use the GroupingSearch convenience utility: // Per search: GroupingSearch groupingSearch = new GroupingSearch(groupEndDocs); groupingSearch.setGroupSort(groupSort); groupingSearch.setIncludeScores(needsScores); TermQuery query = new TermQuery(new Term("title", "Lucene")); TopGroups groupsResult2 = groupingSearch.search(searcher, query, groupOffset, groupLimit); printHits(searcher, groupsResult2.groups[0].scoreDocs); }