/** * Groups variations from specified {@link List} of {@link VcfFile}s by specified field * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param groupBy a field to perform grouping * @return a {@link List} of {@link Group}s, mapping field value to number of variations, having this value * @throws IOException if something goes wrong with the file system */ public List<Group> groupVariations(List<VcfFile> files, Query query, String groupBy) throws IOException { List<Group> res = new ArrayList<>(); if (CollectionUtils.isEmpty(files)) { return Collections.emptyList(); } SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } IndexSearcher searcher = new IndexSearcher(reader); AbstractGroupFacetCollector groupedFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(FeatureIndexFields.UID.fieldName, getGroupByField(files, groupBy), false, null, GROUP_INITIAL_SIZE); searcher.search(query, groupedFacetCollector); // Computing the grouped facet counts TermGroupFacetCollector.GroupedFacetResult groupedResult = groupedFacetCollector.mergeSegmentResults( reader.numDocs(), 1, false); List<AbstractGroupFacetCollector.FacetEntry> facetEntries = groupedResult.getFacetEntries(0, reader.numDocs()); for (AbstractGroupFacetCollector.FacetEntry facetEntry : facetEntries) { res.add(new Group(facetEntry.getValue().utf8ToString(), facetEntry.getCount())); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return res; }
/** * Groups variations from specified {@link List} of {@link VcfFile}s by specified field * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param groupBy a field to perform grouping * @return a {@link List} of {@link Group}s, mapping field value to number of variations, having this value * @throws IOException if something goes wrong with the file system */ public List<Group> groupVariations(List<VcfFile> files, Query query, String groupBy) throws IOException { List<Group> res = new ArrayList<>(); if (CollectionUtils.isEmpty(files)) { return Collections.emptyList(); } SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); long totalIndexSize = getTotalIndexSize(indexes); if (totalIndexSize > luceneIndexMaxSizeForGrouping) { throw new IllegalArgumentException(getMessage(MessagesConstants.ERROR_FEATURE_INEDX_TOO_LARGE)); } try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } IndexSearcher searcher = new IndexSearcher(reader); AbstractGroupFacetCollector groupedFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(FeatureIndexFields.UID.fieldName, getGroupByField(files, groupBy), false, null, GROUP_INITIAL_SIZE); searcher.search(query, groupedFacetCollector); // Computing the grouped facet counts TermGroupFacetCollector.GroupedFacetResult groupedResult = groupedFacetCollector.mergeSegmentResults( reader.numDocs(), 1, false); List<AbstractGroupFacetCollector.FacetEntry> facetEntries = groupedResult.getFacetEntries(0, reader.numDocs()); for (AbstractGroupFacetCollector.FacetEntry facetEntry : facetEntries) { res.add(new Group(facetEntry.getValue().utf8ToString(), facetEntry.getCount())); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return res; }