/** * Read the last segments info from the commit pointed to by the searcher manager */ protected static SegmentInfos readLastCommittedSegmentInfos(final SearcherManager sm, final Store store) throws IOException { IndexSearcher searcher = sm.acquire(); try { IndexCommit latestCommit = ((DirectoryReader) searcher.getIndexReader()).getIndexCommit(); return Lucene.readSegmentInfos(latestCommit); } catch (IOException e) { // Fall back to reading from the store if reading from the commit fails try { return store.readLastCommittedSegmentsInfo(); } catch (IOException e2) { e2.addSuppressed(e); throw e2; } } finally { sm.release(searcher); } }
@SuppressForbidden(reason = "System.out.*") public void testSegmentInfosTracing() { // Defaults to not hooking up std out assertNull(SegmentInfos.getInfoStream()); try { // False means don't hook up std out NodeEnvironment.applySegmentInfosTrace( Settings.builder().put(NodeEnvironment.ENABLE_LUCENE_SEGMENT_INFOS_TRACE_SETTING.getKey(), false).build()); assertNull(SegmentInfos.getInfoStream()); // But true means hook std out up statically NodeEnvironment.applySegmentInfosTrace( Settings.builder().put(NodeEnvironment.ENABLE_LUCENE_SEGMENT_INFOS_TRACE_SETTING.getKey(), true).build()); assertEquals(System.out, SegmentInfos.getInfoStream()); } finally { // Clean up after ourselves SegmentInfos.setInfoStream(null); } }
/** * Read the last segments info from the commit pointed to by the searcher manager */ protected static SegmentInfos readLastCommittedSegmentInfos(final SearcherManager sm, final Store store) throws IOException { IndexSearcher searcher = sm.acquire(); try { IndexCommit latestCommit = ((DirectoryReader) searcher.getIndexReader()).getIndexCommit(); return Lucene.readSegmentInfos(latestCommit); } catch (IOException e) { // Fall back to reading from the store if reading from the commit fails try { return store. readLastCommittedSegmentsInfo(); } catch (IOException e2) { e2.addSuppressed(e); throw e2; } } finally { sm.release(searcher); } }
@Test public void testCommitNoEmptyCommits() throws Exception { // LUCENE-4972: DTW used to create empty commits even if no changes were made Directory dir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new FacetLabel("a")); taxoWriter.commit(); long gen1 = SegmentInfos.getLastCommitGeneration(dir); taxoWriter.commit(); long gen2 = SegmentInfos.getLastCommitGeneration(dir); assertEquals("empty commit should not have changed the index", gen1, gen2); taxoWriter.close(); dir.close(); }
@Test public void testCloseNoEmptyCommits() throws Exception { // LUCENE-4972: DTW used to create empty commits even if no changes were made Directory dir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new FacetLabel("a")); taxoWriter.commit(); long gen1 = SegmentInfos.getLastCommitGeneration(dir); taxoWriter.close(); long gen2 = SegmentInfos.getLastCommitGeneration(dir); assertEquals("empty commit should not have changed the index", gen1, gen2); taxoWriter.close(); dir.close(); }
@Test public void testPrepareCommitNoEmptyCommits() throws Exception { // LUCENE-4972: DTW used to create empty commits even if no changes were made Directory dir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new FacetLabel("a")); taxoWriter.prepareCommit(); taxoWriter.commit(); long gen1 = SegmentInfos.getLastCommitGeneration(dir); taxoWriter.prepareCommit(); taxoWriter.commit(); long gen2 = SegmentInfos.getLastCommitGeneration(dir); assertEquals("empty commit should not have changed the index", gen1, gen2); taxoWriter.close(); dir.close(); }
@Test public void testCommitNoEmptyCommits() throws Exception { // LUCENE-4972: DTW used to create empty commits even if no changes were made Directory dir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new CategoryPath("a")); taxoWriter.commit(); long gen1 = SegmentInfos.getLastCommitGeneration(dir); taxoWriter.commit(); long gen2 = SegmentInfos.getLastCommitGeneration(dir); assertEquals("empty commit should not have changed the index", gen1, gen2); taxoWriter.close(); dir.close(); }
@Test public void testCloseNoEmptyCommits() throws Exception { // LUCENE-4972: DTW used to create empty commits even if no changes were made Directory dir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new CategoryPath("a")); taxoWriter.commit(); long gen1 = SegmentInfos.getLastCommitGeneration(dir); taxoWriter.close(); long gen2 = SegmentInfos.getLastCommitGeneration(dir); assertEquals("empty commit should not have changed the index", gen1, gen2); taxoWriter.close(); dir.close(); }
@Test public void testPrepareCommitNoEmptyCommits() throws Exception { // LUCENE-4972: DTW used to create empty commits even if no changes were made Directory dir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new CategoryPath("a")); taxoWriter.prepareCommit(); taxoWriter.commit(); long gen1 = SegmentInfos.getLastCommitGeneration(dir); taxoWriter.prepareCommit(); taxoWriter.commit(); long gen2 = SegmentInfos.getLastCommitGeneration(dir); assertEquals("empty commit should not have changed the index", gen1, gen2); taxoWriter.close(); dir.close(); }
public CommitStats(SegmentInfos segmentInfos) { // clone the map to protect against concurrent changes userData = MapBuilder.<String, String>newMapBuilder().putAll(segmentInfos.getUserData()).immutableMap(); // lucene calls the current generation, last generation. generation = segmentInfos.getLastGeneration(); id = Base64.getEncoder().encodeToString(segmentInfos.getId()); numDocs = Lucene.getNumDocs(segmentInfos); }
private void addRecoveredFileDetails(SegmentInfos si, Store store, RecoveryState.Index index) throws IOException { final Directory directory = store.directory(); for (String name : Lucene.files(si)) { long length = directory.fileLength(name); index.addFileDetail(name, length, true); } }
/** * Returns the last committed segments info for this store * * @throws IOException if the index is corrupted or the segments file is not present */ public SegmentInfos readLastCommittedSegmentsInfo() throws IOException { failIfCorrupted(); try { return readSegmentsInfo(null, directory()); } catch (CorruptIndexException ex) { markStoreCorrupted(ex); throw ex; } }
/** * Returns the segments info for the given commit or for the latest commit if the given commit is <code>null</code> * * @throws IOException if the index is corrupted or the segments file is not present */ private static SegmentInfos readSegmentsInfo(IndexCommit commit, Directory directory) throws IOException { assert commit == null || commit.getDirectory() == directory; try { return commit == null ? Lucene.readSegmentInfos(directory) : Lucene.readSegmentInfos(commit); } catch (EOFException eof) { // TODO this should be caught by lucene - EOF is almost certainly an index corruption throw new CorruptIndexException("Read past EOF while reading segment infos", "commit(" + commit + ")", eof); } catch (IOException exception) { throw exception; // IOExceptions like too many open files are not necessarily a corruption - just bubble it up } catch (Exception ex) { throw new CorruptIndexException("Hit unexpected exception while reading segment infos", "commit(" + commit + ")", ex); } }
/** * Tries to open an index for the given location. This includes reading the * segment infos and possible corruption markers. If the index can not * be opened, an exception is thrown */ public static void tryOpenIndex(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException, ShardLockObtainFailedException { try (ShardLock lock = shardLocker.lock(shardId, TimeUnit.SECONDS.toMillis(5)); Directory dir = new SimpleFSDirectory(indexLocation)) { failIfCorrupted(dir, shardId); SegmentInfos segInfo = Lucene.readSegmentInfos(dir); logger.trace("{} loaded segment info [{}]", shardId, segInfo); } }
/** * Returns an iterable that allows to iterate over all files in this segments info */ public static Iterable<String> files(SegmentInfos infos) throws IOException { final List<Collection<String>> list = new ArrayList<>(); list.add(Collections.singleton(infos.getSegmentsFileName())); for (SegmentCommitInfo info : infos) { list.add(info.files()); } return Iterables.flatten(list); }
/** * Returns the number of documents in the index referenced by this {@link SegmentInfos} */ public static int getNumDocs(SegmentInfos info) { int numDocs = 0; for (SegmentCommitInfo si : info) { numDocs += si.info.maxDoc() - si.getDelCount(); } return numDocs; }
/** * Reads the segments infos from the given commit, failing if it fails to load */ public static SegmentInfos readSegmentInfos(IndexCommit commit) throws IOException { // Using commit.getSegmentsFileName() does NOT work here, have to // manually create the segment filename String filename = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", commit.getGeneration()); return SegmentInfos.readCommit(commit.getDirectory(), filename); }
/** * This method removes all files from the given directory that are not referenced by the given segments file. * This method will open an IndexWriter and relies on index file deleter to remove all unreferenced files. Segment files * that are newer than the given segments file are removed forcefully to prevent problems with IndexWriter opening a potentially * broken commit point / leftover. * <b>Note:</b> this method will fail if there is another IndexWriter open on the given directory. This method will also acquire * a write lock from the directory while pruning unused files. This method expects an existing index in the given directory that has * the given segments file. */ public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Directory directory) throws IOException { final SegmentInfos si = readSegmentInfos(segmentsFileName, directory); try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) { int foundSegmentFiles = 0; for (final String file : directory.listAll()) { /** * we could also use a deletion policy here but in the case of snapshot and restore * sometimes we restore an index and override files that were referenced by a "future" * commit. If such a commit is opened by the IW it would likely throw a corrupted index exception * since checksums don's match anymore. that's why we prune the name here directly. * We also want the caller to know if we were not able to remove a segments_N file. */ if (file.startsWith(IndexFileNames.SEGMENTS) || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) { foundSegmentFiles++; if (file.equals(si.getSegmentsFileName()) == false) { directory.deleteFile(file); // remove all segment_N files except of the one we wanna keep } } } assert SegmentInfos.getLastCommitSegmentsFileName(directory).equals(segmentsFileName); if (foundSegmentFiles == 0) { throw new IllegalStateException("no commit found in the directory"); } } final CommitPoint cp = new CommitPoint(si, directory); try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER) .setIndexCommit(cp) .setCommitOnClose(false) .setMergePolicy(NoMergePolicy.INSTANCE) .setOpenMode(IndexWriterConfig.OpenMode.APPEND))) { // do nothing and close this will kick of IndexFileDeleter which will remove all pending files } return si; }
public static void checkSegmentInfoIntegrity(final Directory directory) throws IOException { new SegmentInfos.FindSegmentsFile(directory) { @Override protected Object doBody(String segmentFileName) throws IOException { try (IndexInput input = directory.openInput(segmentFileName, IOContext.READ)) { CodecUtil.checksumEntireFile(input); } return null; } }.run(); }
private CommitPoint(SegmentInfos infos, Directory dir) throws IOException { segmentsFileName = infos.getSegmentsFileName(); this.dir = dir; userData = infos.getUserData(); files = Collections.unmodifiableCollection(infos.files(true)); generation = infos.getGeneration(); segmentCount = infos.size(); }
public CommitStats(SegmentInfos segmentInfos) { // clone the map to protect against concurrent changes userData = MapBuilder.<String, String>newMapBuilder().putAll(segmentInfos.getUserData()).immutableMap(); // lucene calls the current generation, last generation. generation = segmentInfos.getLastGeneration(); if (segmentInfos.getId() != null) { // id is only written starting with Lucene 5.0 id = Base64.encodeBytes(segmentInfos.getId()); } numDocs = Lucene.getNumDocs(segmentInfos); }
public void testCommitData() throws Exception { PerfRunData runData = createPerfRunData(); new CreateIndexTask(runData).doLogic(); CommitIndexTask task = new CommitIndexTask(runData); task.setParams("params"); task.doLogic(); SegmentInfos infos = new SegmentInfos(); infos.read(runData.getDirectory()); assertEquals("params", infos.getUserData().get(OpenReaderTask.USER_DATA)); new CloseIndexTask(runData).doLogic(); }
/** * Writes {@link IndexFileNames#SEGMENTS_GEN} file to the directory, reading * the generation from the given {@code segmentsFile}. If it is {@code null}, * this method deletes segments.gen from the directory. */ public static void writeSegmentsGen(String segmentsFile, Directory dir) { if (segmentsFile != null) { SegmentInfos.writeSegmentsGen(dir, SegmentInfos.generationFromSegmentsFileName(segmentsFile)); } else { IOUtils.deleteFilesIgnoringExceptions(dir, IndexFileNames.SEGMENTS_GEN); } }
@Override public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { return inner.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer); }
@Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) throws IOException { return inner.findMerges(mergeTrigger, segmentInfos, writer); }
@Override public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer) throws IOException { return inner.useCompoundFile(infos, mergedInfo, writer); }
private Collection<SegmentKey> getSegmentKeys() throws IOException { List<SegmentKey> keys = new ArrayList<SegmentKey>(); SegmentInfos segmentInfos = new SegmentInfos(); segmentInfos.read(_directory, _indexCommit.getSegmentsFileName()); for (SegmentInfoPerCommit segmentInfoPerCommit : segmentInfos) { String name = segmentInfoPerCommit.info.name; String id = getId(segmentInfoPerCommit.info); keys.add(new SegmentKey(name, id)); } return keys; }
/** * Writes {@link IndexFileNames#SEGMENTS_GEN} file to the directory, reading * the generation from the given {@code segmentsFile}. If it is {@code null}, * this method deletes segments.gen from the directory. */ public static void writeSegmentsGen(String segmentsFile, Directory dir) { if (segmentsFile != null) { SegmentInfos.writeSegmentsGen(dir, SegmentInfos.generationFromSegmentsFileName(segmentsFile)); } else { try { if (dir.fileExists(IndexFileNames.SEGMENTS_GEN)) { dir.deleteFile(IndexFileNames.SEGMENTS_GEN); } } catch (Throwable t) { // suppress any errors while deleting this file. } } }
@Override protected SegmentInfos getLastCommittedSegmentInfos() { try { return (SegmentInfos) getLastCommittedSegmentInfosMethod .invoke(engine); } catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException e) { throw new ExtendedEngineException( "Cannot invoke getLastCommittedSegmentInfos() of " + engine, e); } }
@Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos) throws CorruptIndexException, IOException { //System.err.println("findMerges"); MergeSpecification ms = new MergeSpecification(); if (doMerge) { List<SegmentInfoPerCommit> mergeInfos = new ArrayList<SegmentInfoPerCommit>(); int target = 0; int count = 0; for (int i = 0; i < segmentInfos.size() && target < targets.length; i++) { SegmentInfoPerCommit commit = segmentInfos.info(i); SegmentInfo info = commit.info; if (info.getDocCount() == targets[target]) { // this one is ready target++; continue; } if (count + info.getDocCount() <= targets[target]) { mergeInfos.add(commit); count += info.getDocCount(); } else { assert info.getDocCount() < targets[target] : "doc count should be smaller than the current target"; if (mergeInfos.size() > 0) { OneMerge om = new OneMerge(mergeInfos); ms.add(om); } count = 0; mergeInfos = new ArrayList<SegmentInfoPerCommit>(); } } if (mergeInfos.size() > 0) { OneMerge om = new OneMerge(mergeInfos); ms.add(om); } //doMerge = false; return ms; } return null; }
@Override public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfoPerCommit,Boolean> segmentsToOptimize) throws CorruptIndexException, IOException { System.err.println("findForcedMerges"); return null; }
@SuppressForbidden(reason = "System.out.*") static void applySegmentInfosTrace(Settings settings) { if (ENABLE_LUCENE_SEGMENT_INFOS_TRACE_SETTING.get(settings)) { SegmentInfos.setInfoStream(System.out); } }
protected Segment[] getSegmentInfo(SegmentInfos lastCommittedSegmentInfos, boolean verbose) { ensureOpen(); Map<String, Segment> segments = new HashMap<>(); // first, go over and compute the search ones... Searcher searcher = acquireSearcher("segments"); try { for (LeafReaderContext reader : searcher.reader().leaves()) { SegmentCommitInfo info = segmentReader(reader.reader()).getSegmentInfo(); assert !segments.containsKey(info.info.name); Segment segment = new Segment(info.info.name); segment.search = true; segment.docCount = reader.reader().numDocs(); segment.delDocCount = reader.reader().numDeletedDocs(); segment.version = info.info.getVersion(); segment.compound = info.info.getUseCompoundFile(); try { segment.sizeInBytes = info.sizeInBytes(); } catch (IOException e) { logger.trace((Supplier<?>) () -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e); } final SegmentReader segmentReader = segmentReader(reader.reader()); segment.memoryInBytes = segmentReader.ramBytesUsed(); if (verbose) { segment.ramTree = Accountables.namedAccountable("root", segmentReader); } // TODO: add more fine grained mem stats values to per segment info here segments.put(info.info.name, segment); } } finally { searcher.close(); } // now, correlate or add the committed ones... if (lastCommittedSegmentInfos != null) { SegmentInfos infos = lastCommittedSegmentInfos; for (SegmentCommitInfo info : infos) { Segment segment = segments.get(info.info.name); if (segment == null) { segment = new Segment(info.info.name); segment.search = false; segment.committed = true; segment.docCount = info.info.maxDoc(); segment.delDocCount = info.getDelCount(); segment.version = info.info.getVersion(); segment.compound = info.info.getUseCompoundFile(); try { segment.sizeInBytes = info.sizeInBytes(); } catch (IOException e) { logger.trace((Supplier<?>) () -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e); } segments.put(info.info.name, segment); } else { segment.committed = true; } } } Segment[] segmentsArr = segments.values().toArray(new Segment[segments.values().size()]); Arrays.sort(segmentsArr, new Comparator<Segment>() { @Override public int compare(Segment o1, Segment o2) { return (int) (o1.getGeneration() - o2.getGeneration()); } }); return segmentsArr; }
@Override protected SegmentInfos getLastCommittedSegmentInfos() { return lastCommittedSegmentInfos; }
@Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) throws IOException { return delegate.findMerges(mergeTrigger, segmentInfos, writer); }