static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); doc.add(new LongPoint("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
@Override public void process(ProcessingContext<Corpus> ctx, Corpus corpus) throws ModuleException { try (KeywordAnalyzer kwa = new KeywordAnalyzer()) { IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, kwa); writerConfig.setOpenMode(append ? OpenMode.CREATE_OR_APPEND : OpenMode.CREATE); try (Directory dir = FSDirectory.open(indexDir)) { try (IndexWriter writer = new IndexWriter(dir, writerConfig)) { AlvisDBIndexerResolvedObjects resObj = getResolvedObjects(); Logger logger = getLogger(ctx); EvaluationContext evalCtx = new EvaluationContext(logger); for (ADBElements.Resolved ent : resObj.elements) { ent.indexElements(logger, writer, evalCtx, corpus); } } } catch (IOException e) { rethrow(e); } } }
/** * {@link PersistentSnapshotDeletionPolicy} wraps another * {@link IndexDeletionPolicy} to enable flexible snapshotting. * * @param primary * the {@link IndexDeletionPolicy} that is used on non-snapshotted * commits. Snapshotted commits, by definition, are not deleted until * explicitly released via {@link #release}. * @param dir * the {@link Directory} which will be used to persist the snapshots * information. * @param mode * specifies whether a new index should be created, deleting all * existing snapshots information (immediately), or open an existing * index, initializing the class with the snapshots information. */ public PersistentSnapshotDeletionPolicy(IndexDeletionPolicy primary, Directory dir, OpenMode mode) throws IOException { super(primary); this.dir = dir; if (mode == OpenMode.CREATE) { clearPriorSnapshots(); } loadPriorSnapshots(); if (mode == OpenMode.APPEND && nextWriteGen == 0) { throw new IllegalStateException("no snapshots stored in this directory"); } }
/** * Constructor for LuceneIndex * * @param dataDirectory Path to the directory to create an index directory within. * @throws IndexException */ public LuceneIndex(Path dataDirectory) throws IndexException { //TODO: Check to make sure directory is read/writable path = dataDirectory.resolve(INDEXDIR); try { dir = FSDirectory.open(path); analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, iwc); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer); } catch (IOException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
/** * 获取writer * * @return * @throws IOException */ protected static IndexWriter getIndexWriter() throws IOException { if (null != indexWriter) { return indexWriter; } else { // 防止并发 synchronized (IndexUtil.class) { // 初始化writer IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); // 每次都重新创建 config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); } return indexWriter; } }
public void initializeIndexBuilder() throws Exception { // Create a new index directory and writer to index a triples file. // Raise an error if an index already exists, so we don't accidentally overwrite it. String indexDir = getIndexDirectoryName(); if ((new File(indexDir)).isDirectory()) throw new IOException("Index directory already exists, remove it before indexing"); indexDirectory = FSDirectory.open(Paths.get(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer()); // we always create a new index from scratch: iwc.setOpenMode(OpenMode.CREATE); iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION)); // slower, but better compression indexWriter = new IndexWriter(indexDirectory, iwc); indexAnalyzer = getIndexAnalyzer(); if (INDEX_PREDICATES) printlnProg("Indexing individual predicates"); if (INDEX_TEXT) printlnProg("Indexing combined predicate text values"); if (INDEX_LANGUAGE) printlnProg("Indexing predicates for language(s): " + supportedLanguages); }
public void initializeIndexBuilder() throws Exception { // Create a new index directory and writer to index a triples file. // Raise an error if an index already exists, so we don't accidentally overwrite it. String indexDir = getIndexDirectoryName(); if ((new java.io.File(indexDir)).isDirectory()) throw new IOException("Index directory already exists, remove it before indexing"); indexDirectory = FSDirectory.open(Paths.get(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer()); // we always create a new index from scratch: iwc.setOpenMode(OpenMode.CREATE); iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION)); // slower, but better compression indexWriter = new IndexWriter(indexDirectory, iwc); indexAnalyzer = getIndexAnalyzer(); if (INDEX_PREDICATES) printlnProg("Indexing individual predicates"); if (INDEX_TEXT) printlnProg("Indexing combined predicate text values"); if (INDEX_LANGUAGE) printlnProg("Indexing predicates for language(s): " + supportedLanguages); }
public static void main(String args[]) throws Exception { initLoggers(Level.ERROR); RedisDirectory DIR = new RedisDirectory(REDIS_HOST, REDIS_PORT, REDIS_PASSWORD); DIR.init(); long t1 = System.currentTimeMillis(); try { Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(DIR, iwc); Path docDir = Paths .get("/Users/btnguyen/Workspace/Apps/Apache-Cassandra-2.1.8/javadoc/"); indexDocs(iw, docDir); iw.commit(); iw.close(); } finally { DIR.destroy(); } long t2 = System.currentTimeMillis(); System.out.println("Finished in " + (t2 - t1) / 1000.0 + " sec"); }
public static void main(String args[]) throws Exception { initLoggers(Level.INFO); RedisDirectory DIR = new RedisDirectory(REDIS_HOST, REDIS_PORT, REDIS_PASSWORD); DIR.init(); long t1 = System.currentTimeMillis(); try { Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(DIR, iwc); Document doc = new Document(); doc.add(new StringField("id", "thanhnb", Field.Store.YES)); doc.add(new TextField("name", "Nguyen Ba Thanh", Field.Store.NO)); iw.updateDocument(new Term("id", "thanhnb"), doc); iw.commit(); iw.close(); } finally { DIR.destroy(); } long t2 = System.currentTimeMillis(); System.out.println("Finished in " + (t2 - t1) / 1000.0 + " sec"); }
public void testNoMergeAfterCopy() throws IOException { // main directory Directory dir = newDirectory(); // auxiliary directory Directory aux = newDirectory(); setUpDirs(dir, aux); IndexWriter writer = newWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())). setOpenMode(OpenMode.APPEND). setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(4)) ); writer.addIndexes(aux, new MockDirectoryWrapper(random(), new RAMDirectory(aux, newIOContext(random())))); assertEquals(1060, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); // make sure the index is correct verifyNumDocs(dir, 1060); dir.close(); aux.close(); }
public PDLucene () throws IOException { // Where to store the index files final Path aPath = getLuceneIndexDir ().toPath (); m_aDir = FSDirectory.open (aPath); // Analyzer to use m_aAnalyzer = createAnalyzer (); // Create the index writer final IndexWriterConfig aWriterConfig = new IndexWriterConfig (m_aAnalyzer); aWriterConfig.setOpenMode (OpenMode.CREATE_OR_APPEND); m_aIndexWriter = new IndexWriter (m_aDir, aWriterConfig); // Reader and searcher are opened on demand s_aLogger.info ("Lucene index operating on " + aPath); }
/** * Creates an empty collection to get it up and running */ public synchronized void create() throws IOException { setDirectory(); if ( directory.listAll().length > 2 ) throw new IOException( "directory not empty; possible collection already present" ); IndexWriterConfig iwc = new IndexWriterConfig( AnalyzerFactory.get(language) ); iwc.setOpenMode( OpenMode.CREATE ); indexwriter = new IndexWriter(directory, iwc); indexwriter.commit(); indexwriter.close(); indexwriter = null; // throw an openbd.create file in there so we know when it was created created = System.currentTimeMillis(); File touchFile = new File( collectionpath, "openbd.created" ); Writer fw = new FileWriter( touchFile ); fw.close(); }
GerritIndexWriterConfig(Config cfg, String name) { analyzer = new CustomMappingAnalyzer( new StandardAnalyzer(CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING); luceneConfig = new IndexWriterConfig(analyzer) .setOpenMode(OpenMode.CREATE_OR_APPEND) .setCommitOnClose(true); double m = 1 << 20; luceneConfig.setRAMBufferSizeMB( cfg.getLong( "index", name, "ramBufferSize", (long) (IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB * m)) / m); luceneConfig.setMaxBufferedDocs( cfg.getInt("index", name, "maxBufferedDocs", IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS)); try { commitWithinMs = ConfigUtil.getTimeUnit( cfg, "index", name, "commitWithin", MILLISECONDS.convert(5, MINUTES), MILLISECONDS); } catch (IllegalArgumentException e) { commitWithinMs = cfg.getLong("index", name, "commitWithin", 0); } }
public void prepareIndex() throws IOException { File globalWFMDIr = new File(Util.GTPM_INDEX_DIR); if (!globalWFMDIr.exists()) { Util.createDirs(Util.GTPM_INDEX_DIR); } KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer(); IndexWriterConfig wfmIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, keywordAnalyzer); wfmIndexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); wfmIndexWriterConfig.setRAMBufferSizeMB(1024); logger.info("PREPARE INDEX"); try { wfmIndexWriter = new IndexWriter(FSDirectory.open(new File(Util.GTPM_INDEX_DIR)), wfmIndexWriterConfig); wfmIndexWriter.commit(); wfmIndexer = new DocumentMaker(wfmIndexWriter); } catch (IOException e) { e.printStackTrace(); } }
private static IndexWriterConfig getIndexWriterConfig() { final Analyzer analyzer = getAnalyzer(); final IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer); final boolean IS_DELETE_INDEX = true; if (IS_DELETE_INDEX) { // delete old index and create a new writerConfig.setOpenMode(OpenMode.CREATE); } else { writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); } return writerConfig; }
@Test public void testCommit() throws Exception { // Verifies that nothing is committed to the underlying Directory, if // commit() wasn't called. Directory dir = newDirectory(); DirectoryTaxonomyWriter ltw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); assertFalse(DirectoryReader.indexExists(dir)); ltw.commit(); // first commit, so that an index will be created ltw.addCategory(new FacetLabel("a")); IndexReader r = DirectoryReader.open(dir); assertEquals("No categories should have been committed to the underlying directory", 1, r.numDocs()); r.close(); ltw.close(); dir.close(); }
public void testRAMDirectoryNoLocking() throws IOException { MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new RAMDirectory()); dir.setLockFactory(NoLockFactory.getNoLockFactory()); dir.setWrapLockFactory(false); // we are gonna explicitly test we get this back assertTrue("RAMDirectory.setLockFactory did not take", NoLockFactory.class.isInstance(dir.getLockFactory())); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); writer.commit(); // required so the second open succeed // Create a 2nd IndexWriter. This is normally not allowed but it should run through since we're not // using any locks: IndexWriter writer2 = null; try { writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND)); } catch (Exception e) { e.printStackTrace(System.out); fail("Should not have hit an IOException with no locking"); } writer.close(); if (writer2 != null) { writer2.close(); } }
private File buildIndex() throws IOException { File path = createTempDir("buildIndex"); Directory dir = newFSDirectory(path); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)); // add some documents Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); doc.add(newStringField("content", English.intToEnglish(i).trim(), Field.Store.YES)); writer.addDocument(doc); } assertEquals(docsToAdd, writer.maxDoc()); writer.close(); dir.close(); return path; }
@Test public void testMergeUnusedPerFieldCodec() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE).setCodec(new MockCodec()); IndexWriter writer = newWriter(dir, iwconf); addDocs(writer, 10); writer.commit(); addDocs3(writer, 10); writer.commit(); addDocs2(writer, 10); writer.commit(); assertEquals(30, writer.maxDoc()); TestUtil.checkIndex(dir); writer.forceMerge(1); assertEquals(30, writer.maxDoc()); writer.close(); dir.close(); }
public void testFilteredSearch() throws IOException { boolean enforceSingleSegment = true; Directory directory = newDirectory(); int[] filterBits = {1, 36}; SimpleDocIdSetFilter filter = new SimpleDocIdSetFilter(filterBits); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); searchFiltered(writer, directory, filter, enforceSingleSegment); // run the test on more than one segment enforceSingleSegment = false; writer.close(); writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10).setMergePolicy(newLogMergePolicy())); // we index 60 docs - this will create 6 segments searchFiltered(writer, directory, filter, enforceSingleSegment); writer.close(); directory.close(); }
/** * Initializes the tests by adding documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)) .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); addDocument(writer, "C", "It shouldn't."); addDocument(writer, "D", "Should we, should we, should we."); reader2 = writer.getReader(); writer.close(); // re-open the searcher since we added more docs searcher2 = newSearcher(reader2); searcher2.setSimilarity(new DefaultSimilarity()); }
public void _testStressLocks(LockFactory lockFactory, File indexDir) throws Exception { Directory dir = newFSDirectory(indexDir, lockFactory); // First create a 1 doc index: IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)); addDoc(w); w.close(); WriterThread writer = new WriterThread(100, dir); SearcherThread searcher = new SearcherThread(100, dir); writer.start(); searcher.start(); while(writer.isAlive() || searcher.isAlive()) { Thread.sleep(1000); } assertTrue("IndexWriter hit unexpected exceptions", !writer.hitException); assertTrue("IndexSearcher hit unexpected exceptions", !searcher.hitException); dir.close(); // Cleanup TestUtil.rm(indexDir); }
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception { int[] freq = new int[nTerms]; terms = new Term[nTerms]; for (int i=0; i<nTerms; i++) { int f = (nTerms+1)-i; // make first terms less frequent freq[i] = (int)Math.ceil(Math.pow(f,power)); terms[i] = new Term("f",Character.toString((char)('A'+i))); } IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)); for (int i=0; i<nDocs; i++) { Document d = new Document(); for (int j=0; j<nTerms; j++) { if (random().nextInt(freq[j]) == 0) { d.add(newStringField("f", terms[j].text(), Field.Store.NO)); //System.out.println(d); } } iw.addDocument(d); } iw.forceMerge(1); iw.close(); }
public void changeIndexNoAdds(Random random, Directory dir) throws IOException { // make sure searching sees right # hits DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; assertEquals("wrong number of hits", 34, hits.length); Document d = searcher.doc(hits[0].doc); assertEquals("wrong first document", "21", d.get("id")); reader.close(); // fully merge IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); writer.forceMerge(1); writer.close(); reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; assertEquals("wrong number of hits", 34, hits.length); doTestHits(hits, 34, searcher.getIndexReader()); reader.close(); }
public void testUpgradeWithNRTReader() throws Exception { for (String name : oldNames) { Directory dir = newDirectory(oldIndexDirs.get(name)); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); writer.addDocument(new Document()); DirectoryReader r = DirectoryReader.open(writer, true); writer.commit(); r.close(); writer.forceMerge(1); writer.commit(); writer.rollback(); new SegmentInfos().read(dir); dir.close(); } }
public void testIsCurrent() throws Exception { Directory d = newDirectory(); IndexWriter writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); addDocumentWithFields(writer); writer.close(); // set up reader: DirectoryReader reader = DirectoryReader.open(d); assertTrue(reader.isCurrent()); // modify index by adding another document: writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); addDocumentWithFields(writer); writer.close(); assertFalse(reader.isCurrent()); // re-create index: writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE)); addDocumentWithFields(writer); writer.close(); assertFalse(reader.isCurrent()); reader.close(); d.close(); }
private void fillIndex(Random random, Directory dir, int start, int numDocs) throws IOException { IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random)) .setOpenMode(OpenMode.CREATE) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy(2)) ); for (int i = start; i < (start + numDocs); i++) { Document temp = new Document(); temp.add(newStringField("count", (""+i), Field.Store.YES)); writer.addDocument(temp); } writer.close(); }
public void testDefaultRAMDirectory() throws IOException { Directory dir = new RAMDirectory(); assertTrue("RAMDirectory did not use correct LockFactory: got " + dir.getLockFactory(), SingleInstanceLockFactory.class.isInstance(dir.getLockFactory())); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); // Create a 2nd IndexWriter. This should fail: IndexWriter writer2 = null; try { writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND)); fail("Should have hit an IOException with two IndexWriters on default SingleInstanceLockFactory"); } catch (IOException e) { } writer.close(); if (writer2 != null) { writer2.close(); } }
public void testIndexNoDocuments() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(dir); assertEquals(0, reader.maxDoc()); assertEquals(0, reader.numDocs()); reader.close(); writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); writer.commit(); writer.close(); reader = DirectoryReader.open(dir); assertEquals(0, reader.maxDoc()); assertEquals(0, reader.numDocs()); reader.close(); dir.close(); }
public void changeIndexNoAdds(Random random, Directory dir) throws IOException { // make sure searching sees right # hits DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; assertEquals("wrong number of hits", 34, hits.length); Document d = searcher.doc(hits[0].doc); assertEquals("wrong first document", "0", d.get("id")); reader.close(); // fully merge IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random)) .setOpenMode(OpenMode.APPEND)); writer.forceMerge(1); writer.close(); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; assertEquals("wrong number of hits", 34, hits.length); doTestHits(hits, 34, searcher.getIndexReader()); reader.close(); }
private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields fields = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(fields, null, flags, fields); outResponse.setExists(true); dr.close(); dir.close(); }
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields termVectors = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(termVectors, null, flags, termVectors); dr.close(); dir.close(); }
public FbEntityIndexer(String namefile, String outputDir, String indexingStrategy) throws IOException { if (!indexingStrategy.equals("exact") && !indexingStrategy.equals("inexact")) throw new RuntimeException("Bad indexing strategy: " + indexingStrategy); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44 , indexingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44)); config.setOpenMode(OpenMode.CREATE); config.setRAMBufferSizeMB(256.0); indexer = new IndexWriter(new SimpleFSDirectory(new File(outputDir)), config); this.nameFile = namefile; }
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { this.analyzer = analyzer; this.matchVersion = matchVersion; ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; mergedSegmentWarmer = null; termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here delPolicy = new KeepOnlyLastCommitDeletionPolicy(); commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; openMode = OpenMode.CREATE_OR_APPEND; similarity = IndexSearcher.getDefaultSimilarity(); mergeScheduler = new ConcurrentMergeScheduler(); writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT; indexingChain = DocumentsWriterPerThread.defaultIndexingChain; codec = Codec.getDefault(); if (codec == null) { throw new NullPointerException(); } infoStream = InfoStream.getDefault(); mergePolicy = new TieredMergePolicy(); flushPolicy = new FlushByRamOrCountsPolicy(); readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; indexerThreadPool = new DocumentsWriterPerThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; }
public Indexer(String indexDirectoryPath) throws IOException { Directory indexDirectory = FSDirectory.open(Paths.get(indexDirectoryPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDirectory, iwc); }
/** * Indexes a single document * * @throws TikaException * @throws SAXException */ public static void indexDoc(IndexWriter writer, Path file, TextArea results, long lastModified) throws IOException, SAXException, TikaException { AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); try (InputStream stream = Files.newInputStream(file)) { parser.parse(stream, handler, metadata); Document doc = new Document(); String[] metadataNames = metadata.names(); for (String name : metadataNames) doc.add(new TextField(name, metadata.get(name), Field.Store.YES)); doc.add(new StringField("path", file.toString(), Field.Store.YES)); doc.add(new LongPoint("modified", lastModified)); results.appendText("Title: " + metadata.get("title") + "\n"); results.appendText("Artists: " + metadata.get("xmpDM:artist") + "\n"); results.appendText("Genre: " + metadata.get("xmpDM:genre") + "\n"); results.appendText("Year: " + metadata.get("xmpDM:releaseDate") + "\n"); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): results.appendText("adding " + file + "\n"); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed): results.appendText("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
private void initResourceIndex() throws IOException { Directory dir = FSDirectory.open(indexDir.toFile()); StandardAnalyzer indexAnalyzer = new StandardAnalyzer(); IndexWriterConfig indexConfig = new IndexWriterConfig(version, indexAnalyzer); indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); this.writer = new IndexWriter(dir, indexConfig); }
private void openWriter() throws IOException { StandardAnalyzer indexAnalyzer = new StandardAnalyzer(); IndexWriterConfig indexConfig = new IndexWriterConfig(version, indexAnalyzer); indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); this.hitsWriter = new IndexWriter(FSDirectory.open(this.hitsDirectory), indexConfig); storeSearchResult(new SearchResult(SEARSIA_HIT)); this.hitsWriter.commit(); }
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = LuceneTestCase.newDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("plaintext", "the1 quick brown fox jumps over the1 lazy dog comment", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog comment", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields termVectors = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(termVectors, null, flags, termVectors); dr.close(); dir.close(); }