private IndexWriterConfig getIndexWriterConfig(boolean create) { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Exception ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh return iwc; }
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException { // validate incoming readers if (validate) { for (AtomicReader reader : readers) { reader.checkIntegrity(); } } mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); directory = dir; this.termIndexInterval = termIndexInterval; this.codec = segmentInfo.getCodec(); this.context = context; this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); mergeState.segmentInfo.setDocCount(setDocMaps()); }
/** * Constructor with the given index directory and callback to notify when the * indexes were updated. */ public IndexReplicationHandler(Directory indexDir, Callable<Boolean> callback) throws IOException { this.callback = callback; this.indexDir = indexDir; currentRevisionFiles = null; currentVersion = null; if (DirectoryReader.indexExists(indexDir)) { final List<IndexCommit> commits = DirectoryReader.listCommits(indexDir); final IndexCommit commit = commits.get(commits.size() - 1); currentRevisionFiles = IndexRevision.revisionFiles(commit); currentVersion = IndexRevision.revisionVersion(commit); final InfoStream infoStream = InfoStream.getDefault(); if (infoStream.isEnabled(INFO_STREAM_COMPONENT)) { infoStream.message(INFO_STREAM_COMPONENT, "constructor(): currentVersion=" + currentVersion + " currentRevisionFiles=" + currentRevisionFiles); infoStream.message(INFO_STREAM_COMPONENT, "constructor(): commit=" + commit); } } }
public void testNoSegmentsDotGenInflation() throws IOException { Directory dir = newMockDirectory(); // empty commit new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.getGeneration()); // no inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(1, sis.getGeneration()); dir.close(); }
public void testSegmentsInflation() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setCheckIndexOnClose(false); // TODO: allow falling back more than one commit // empty commit new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.getGeneration()); // add trash commit dir.createOutput(IndexFileNames.SEGMENTS + "_2", IOContext.DEFAULT).close(); // ensure inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(2, sis.getGeneration()); // add another trash commit dir.createOutput(IndexFileNames.SEGMENTS + "_4", IOContext.DEFAULT).close(); IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(4, sis.getGeneration()); dir.close(); }
public void testTrashyFile() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setCheckIndexOnClose(false); // TODO: maybe handle such trash better elsewhere... // empty commit new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.getGeneration()); // add trash file dir.createOutput(IndexFileNames.SEGMENTS + "_", IOContext.DEFAULT).close(); // no inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(1, sis.getGeneration()); dir.close(); }
public void testTrashyGenFile() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); // initial commit IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); iw.addDocument(new Document()); iw.commit(); iw.close(); // no deletes: start at 1 SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.info(0).getNextDelGen()); // add trash file dir.createOutput("_1_A", IOContext.DEFAULT).close(); // no inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(1, sis.info(0).getNextDelGen()); dir.close(); }
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable { final int termIndexInterval = TestUtil.nextInt(random(), 13, 27); final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null); final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random())); final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); Arrays.sort(fields); for (final FieldData field : fields) { if (!allowPreFlex && codec instanceof Lucene3xCodec) { // code below expects unicode sort order continue; } field.write(consumer); } consumer.close(); }
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable { final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27); final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null); final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random())); final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); Arrays.sort(fields); for (final FieldData field : fields) { if (!allowPreFlex && codec instanceof Lucene3xCodec) { // code below expects unicode sort order continue; } field.write(consumer); } consumer.close(); }
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable { final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27); final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random())); final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); Arrays.sort(fields); for (final FieldData field : fields) { if (!allowPreFlex && codec instanceof Lucene3xCodec) { // code below expects unicode sort order continue; } field.write(consumer); } consumer.close(); }
/** * Constructor which takes segment suffix. * * @see #SegmentWriteState(InfoStream, Directory, SegmentInfo, FieldInfos, int, * BufferedUpdates, IOContext) */ public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, int termIndexInterval, BufferedUpdates segUpdates, IOContext context, String segmentSuffix) { this.infoStream = infoStream; this.segUpdates = segUpdates; this.directory = directory; this.segmentInfo = segmentInfo; this.fieldInfos = fieldInfos; this.termIndexInterval = termIndexInterval; assert assertSegmentSuffix(segmentSuffix); this.segmentSuffix = segmentSuffix; this.context = context; }
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given * {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing * all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */ public IndexUpgrader(Directory dir, Version matchVersion, InfoStream infoStream, boolean deletePriorCommits) { this(dir, new IndexWriterConfig(matchVersion, null), deletePriorCommits); if (null != infoStream) { this.iwc.setInfoStream(infoStream); } }
/** Perform the upgrade. */ public void upgrade() throws IOException { if (!DirectoryReader.indexExists(dir)) { throw new IndexNotFoundException(dir.toString()); } if (!deletePriorCommits) { final Collection<IndexCommit> commits = DirectoryReader.listCommits(dir); if (commits.size() > 1) { throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits); } } iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy())); iwc.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); final IndexWriter w = new IndexWriter(dir, iwc); try { InfoStream infoStream = iwc.getInfoStream(); if (infoStream.isEnabled("IndexUpgrader")) { infoStream.message("IndexUpgrader", "Upgrading all pre-" + Version.LATEST + " segments of index directory '" + dir + "' to version " + Version.LATEST + "..."); } w.forceMerge(1); if (infoStream.isEnabled("IndexUpgrader")) { infoStream.message("IndexUpgrader", "All segments upgraded to version " + Version.LATEST); } } finally { w.close(); } }
/** Sole constructor. */ MergeState(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, CheckAbort checkAbort) { this.readers = readers; this.segmentInfo = segmentInfo; this.infoStream = infoStream; this.checkAbort = checkAbort; }
public DocumentsWriterPerThread(String segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue, FieldInfos.Builder fieldInfos, AtomicLong pendingNumDocs) throws IOException { this.directoryOrig = directory; this.directory = new TrackingDirectoryWrapper(directory); this.fieldInfos = fieldInfos; this.indexWriterConfig = indexWriterConfig; this.infoStream = infoStream; this.codec = indexWriterConfig.getCodec(); this.docState = new DocState(this, infoStream); this.docState.similarity = indexWriterConfig.getSimilarity(); this.pendingNumDocs = pendingNumDocs; bytesUsed = Counter.newCounter(); byteBlockAllocator = new DirectTrackingAllocator(bytesUsed); pendingUpdates = new BufferedUpdates(); intBlockAllocator = new IntBlockAllocator(bytesUsed); this.deleteQueue = deleteQueue; assert numDocsInRAM == 0 : "num docs " + numDocsInRAM; pendingUpdates.clear(); deleteSlice = deleteQueue.newSlice(); segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null); assert numDocsInRAM == 0; if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue); } // this should be the last call in the ctor // it really sucks that we need to pull this within the ctor and pass this ref to the chain! consumer = indexWriterConfig.getIndexingChain().getChain(this); }
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { this.analyzer = analyzer; this.matchVersion = matchVersion; ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; mergedSegmentWarmer = null; termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here delPolicy = new KeepOnlyLastCommitDeletionPolicy(); commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; openMode = OpenMode.CREATE_OR_APPEND; similarity = IndexSearcher.getDefaultSimilarity(); mergeScheduler = new ConcurrentMergeScheduler(); writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT; indexingChain = DocumentsWriterPerThread.defaultIndexingChain; codec = Codec.getDefault(); if (codec == null) { throw new NullPointerException(); } infoStream = InfoStream.getDefault(); mergePolicy = new TieredMergePolicy(); flushPolicy = new FlushByRamOrCountsPolicy(); readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; indexerThreadPool = new DocumentsWriterPerThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; }
/** * Information about merges, deletes and a * message when maxFieldLength is reached will be printed * to this. Must not be null, but {@link InfoStream#NO_OUTPUT} * may be used to supress output. */ public IndexWriterConfig setInfoStream(InfoStream infoStream) { if (infoStream == null) { throw new IllegalArgumentException("Cannot set InfoStream implementation to null. "+ "To disable logging use InfoStream.NO_OUTPUT"); } this.infoStream = infoStream; return this; }
@Override public int doLogic() throws IOException { IndexWriter iw = getRunData().getIndexWriter(); if (iw != null) { // If infoStream was set to output to a file, close it. InfoStream infoStream = iw.getConfig().getInfoStream(); if (infoStream != null) { infoStream.close(); } iw.close(doWait); getRunData().setIndexWriter(null); } return 1; }
@Override public int doLogic() throws IOException { IndexWriter iw = getRunData().getIndexWriter(); if (iw != null) { // If infoStream was set to output to a file, close it. InfoStream infoStream = iw.getConfig().getInfoStream(); if (infoStream != null) { infoStream.close(); } iw.rollback(); getRunData().setIndexWriter(null); } return 1; }
/** * Constructor with the given index directory and callback to notify when the * indexes were updated. */ public IndexAndTaxonomyReplicationHandler(Directory indexDir, Directory taxoDir, Callable<Boolean> callback) throws IOException { this.callback = callback; this.indexDir = indexDir; this.taxoDir = taxoDir; currentRevisionFiles = null; currentVersion = null; final boolean indexExists = DirectoryReader.indexExists(indexDir); final boolean taxoExists = DirectoryReader.indexExists(taxoDir); if (indexExists != taxoExists) { throw new IllegalStateException("search and taxonomy indexes must either both exist or not: index=" + indexExists + " taxo=" + taxoExists); } if (indexExists) { // both indexes exist final IndexCommit indexCommit = IndexReplicationHandler.getLastCommit(indexDir); final IndexCommit taxoCommit = IndexReplicationHandler.getLastCommit(taxoDir); currentRevisionFiles = IndexAndTaxonomyRevision.revisionFiles(indexCommit, taxoCommit); currentVersion = IndexAndTaxonomyRevision.revisionVersion(indexCommit, taxoCommit); final InfoStream infoStream = InfoStream.getDefault(); if (infoStream.isEnabled(INFO_STREAM_COMPONENT)) { infoStream.message(INFO_STREAM_COMPONENT, "constructor(): currentVersion=" + currentVersion + " currentRevisionFiles=" + currentRevisionFiles); infoStream.message(INFO_STREAM_COMPONENT, "constructor(): indexCommit=" + indexCommit + " taxoCommit=" + taxoCommit); } } }
/** Sets the {@link InfoStream} to use for logging messages. */ public void setInfoStream(InfoStream infoStream) { if (infoStream == null) { infoStream = InfoStream.NO_OUTPUT; } this.infoStream = infoStream; }
/** * Cleans up the index directory from old index files. This method uses the * last commit found by {@link #getLastCommit(Directory)}. If it matches the * expected segmentsFile, then all files not referenced by this commit point * are deleted. * <p> * <b>NOTE:</b> this method does a best effort attempt to clean the index * directory. It suppresses any exceptions that occur, as this can be retried * the next time. */ public static void cleanupOldIndexFiles(Directory dir, String segmentsFile, InfoStream infoStream) { try { IndexCommit commit = getLastCommit(dir); // commit == null means weird IO errors occurred, ignore them // if there were any IO errors reading the expected commit point (i.e. // segments files mismatch), then ignore that commit either. if (commit != null && commit.getSegmentsFileName().equals(segmentsFile)) { Set<String> commitFiles = new HashSet<>(); commitFiles.addAll(commit.getFileNames()); commitFiles.add(IndexFileNames.SEGMENTS_GEN); Matcher matcher = IndexFileNames.CODEC_FILE_PATTERN.matcher(""); for (String file : dir.listAll()) { if (!commitFiles.contains(file) && (matcher.reset(file).matches() || file.startsWith(IndexFileNames.SEGMENTS))) { // suppress exceptions, it's just a best effort IOUtils.deleteFilesIgnoringExceptions(dir, file); } } } } catch (Throwable t) { // ignore any errors that happen during this state and only log it. this // cleanup will have a chance to succeed the next time we get a new // revision. if (infoStream.isEnabled(INFO_STREAM_COMPONENT)) { infoStream.message(INFO_STREAM_COMPONENT, "cleanupOldIndexFiles(): failed on error " + t.getMessage()); } } }
public void testSegmentNameInflation() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); // empty commit new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(0, sis.counter); // no inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(0, sis.counter); // add trash per-segment file dir.createOutput(IndexFileNames.segmentFileName("_0", "", "foo"), IOContext.DEFAULT).close(); // ensure inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(1, sis.counter); // add trash per-segment file dir.createOutput(IndexFileNames.segmentFileName("_3", "", "foo"), IOContext.DEFAULT).close(); IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(4, sis.counter); // ensure we write _4 segment next IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); iw.addDocument(new Document()); iw.commit(); iw.close(); sis = new SegmentInfos(); sis.read(dir); assertEquals("_4", sis.info(0).info.name); assertEquals(5, sis.counter); dir.close(); }
public void testGenerationInflation() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); // initial commit IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); iw.addDocument(new Document()); iw.commit(); iw.close(); // no deletes: start at 1 SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.info(0).getNextDelGen()); // no inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(1, sis.info(0).getNextDelGen()); // add trash per-segment deletes file dir.createOutput(IndexFileNames.fileNameFromGeneration("_0", "del", 2), IOContext.DEFAULT).close(); // ensure inflation IndexFileDeleter.inflateGens(sis, Arrays.asList(dir.listAll()), InfoStream.getDefault()); assertEquals(3, sis.info(0).getNextDelGen()); dir.close(); }
/** * Randomizes the use of some of hte constructor variations */ private static IndexUpgrader newIndexUpgrader(Directory dir) { final boolean streamType = random().nextBoolean(); final int choice = TestUtil.nextInt(random(), 0, 2); switch (choice) { case 0: return new IndexUpgrader(dir, TEST_VERSION_CURRENT); case 1: return new IndexUpgrader(dir, TEST_VERSION_CURRENT, streamType ? null : InfoStream.NO_OUTPUT, false); case 2: return new IndexUpgrader(dir, newIndexWriterConfig(null), false); default: fail("case statement didn't get updated when random bounds changed"); } return null; // never get here }