@Override public void process(ProcessingContext<Corpus> ctx, Corpus corpus) throws ModuleException { try (KeywordAnalyzer kwa = new KeywordAnalyzer()) { IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, kwa); writerConfig.setOpenMode(append ? OpenMode.CREATE_OR_APPEND : OpenMode.CREATE); try (Directory dir = FSDirectory.open(indexDir)) { try (IndexWriter writer = new IndexWriter(dir, writerConfig)) { AlvisDBIndexerResolvedObjects resObj = getResolvedObjects(); Logger logger = getLogger(ctx); EvaluationContext evalCtx = new EvaluationContext(logger); for (ADBElements.Resolved ent : resObj.elements) { ent.indexElements(logger, writer, evalCtx, corpus); } } } catch (IOException e) { rethrow(e); } } }
public Hits skynetsearch(String query, String Field, String indexPath) { String indexfield = Field + ":"; String querytext = indexfield + query.trim(); Hits result = null; try { String[] search_fields = {Field}; //String indexPath = StorageHandler.GetDocIndexPath(); IndexSearcher searcher = new IndexSearcher(indexPath); KeywordAnalyzer analyzer = new KeywordAnalyzer(); Query lucenequery = MultiFieldQueryParser.parse(query, search_fields, analyzer); // QueryParser queryparse = new QueryParser(query,analyzer); // Query lucenequery = queryparse.parse(querytext); result = searcher.search(lucenequery); } catch (IOException e) { e.printStackTrace(); } catch (Exception ex) { System.out.println(ex + ""); } return result; }
public Hits skynetsearch(String query, String Field) { String indexfield = Field + ":"; String querytext = indexfield + query.trim(); Hits result = null; try { String[] search_fields = {Field}; String indexPath = StorageHandler.GetDocIndexPath(); IndexSearcher searcher = new IndexSearcher(indexPath); KeywordAnalyzer analyzer = new KeywordAnalyzer(); Query lucenequery = MultiFieldQueryParser.parse(query, search_fields, analyzer); // QueryParser queryparse = new QueryParser(query,analyzer); // Query lucenequery = queryparse.parse(querytext); result = searcher.search(lucenequery); } catch (IOException e) { e.printStackTrace(); } catch (Exception ex) { System.out.println(ex + ""); } return result; }
public Hits skynetsearchMulti(String query, String[] Field, String indexPath) { Hits result = null; try { IndexSearcher searcher = new IndexSearcher(indexPath); KeywordAnalyzer analyzer = new KeywordAnalyzer(); MultiFieldQueryParser multiparser = new MultiFieldQueryParser(Field, analyzer); multiparser.setDefaultOperator(QueryParser.Operator.OR); Query lucenequery = multiparser.parse(query); result = searcher.search(lucenequery); } catch (IOException e) { e.printStackTrace(); } catch (Exception ex) { System.out.println(ex + ""); } return result; }
public Hits skynetsearch(String query, String Field) { String indexfield = Field + ":"; String querytext = indexfield + query.trim(); Hits result = null; try { String[] search_fields = {Field}; String indexPath = storageHandlerImpl.GetDocIndexPath(); IndexSearcher searcher = new IndexSearcher(indexPath); KeywordAnalyzer analyzer = new KeywordAnalyzer(); Query lucenequery = MultiFieldQueryParser.parse(query, search_fields, analyzer); // QueryParser queryparse = new QueryParser(query,analyzer); // Query lucenequery = queryparse.parse(querytext); result = searcher.search(lucenequery); } catch (IOException e) { e.printStackTrace(); } catch (Exception ex) { System.out.println(ex + ""); } return result; }
public static Analyzer createAnalyzer() { final PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer()); analyzer.addAnalyzer(DocumentUtil.FIELD_IDENTS, new WhitespaceAnalyzer()); analyzer.addAnalyzer(DocumentUtil.FIELD_FEATURE_IDENTS, new WhitespaceAnalyzer()); analyzer.addAnalyzer(DocumentUtil.FIELD_CASE_INSENSITIVE_FEATURE_IDENTS, new DocumentUtil.LCWhitespaceAnalyzer()); return analyzer; }
/** * Creates a transactional document based index. * The returned {@link DocumentIndex} is not cached, next call with the same arguments returns a different instance * of {@link DocumentIndex}. The caller is responsible to cache the returned {@link DocumentIndex}. * @param cacheFolder the folder in which the index should be stored * @param cache the document caching provider * @return the document based index * @since 2.19 */ @NonNull public static DocumentIndex.Transactional createTransactionalDocumentIndex ( final @NonNull File cacheFolder, final @NonNull DocumentIndexCache cache) throws IOException { Parameters.notNull("cacheFolder", cacheFolder); //NOI18N Parameters.notNull("cache", cache); //NOI18N return createTransactionalDocumentIndex( createTransactionalIndex(cacheFolder, new KeywordAnalyzer()), cache); }
private void setupLuceneIndex() throws Exception { clearWorkDir(); final File wd = getWorkDir(); cache = new File(wd,"cache"); cache.mkdirs(); index = LuceneIndex.create(cache, new KeywordAnalyzer()); }
public void testAsyncClose() throws Exception { final CountDownLatch slot = new CountDownLatch(1); final CountDownLatch signal = new CountDownLatch(1); final CountDownLatch done = new CountDownLatch(1); final AtomicReference<Exception> exception = new AtomicReference<Exception>(); final Index index = IndexManager.createTransactionalIndex(indexFolder, new KeywordAnalyzer()); final Thread worker = new Thread(new Runnable() { @Override public void run() { try { index.store( new ArrayList<String>(Arrays.asList("foo")), //NOI18N Collections.<String>emptySet(), new TestInsertConvertor(slot, signal), new TestDeleteConvertor(), true); } catch (Exception ex) { exception.set(ex); } finally { done.countDown(); } } }); worker.start(); signal.await(); slot.countDown(); index.close(); done.await(); assertNull(exception.get()); }
@NonNull private synchronized DocumentIndex2 getOverlay() throws IOException { if (overlay == null) { overlay = (DocumentIndex2) IndexManager.createDocumentIndex(IndexManager.createMemoryIndex(new KeywordAnalyzer())); } return overlay; }
public void open() throws Exception { if ( !directory.exists() && !directory.mkdirs() ) { throw new IOException("Could not make: " + directory); } IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer()).setOpenMode(IndexWriterConfig.OpenMode.CREATE); niofsDirectory = new NIOFSDirectory(directory, new SingleInstanceLockFactory()); writer = new IndexWriter(niofsDirectory, conf); }
/** * Opens collection for writing. * @throws IOException if opening collection fails */ private void openForWriting() throws IOException { if (writer == null) { if (reader != null) { reader.close(); reader = null; } // create writer used to store data Analyzer analyzer = new KeywordAnalyzer(); writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); } }
public void testPerFieldAnalyzer() throws Exception { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new SimpleAnalyzer()); analyzer.addAnalyzer("partnum", new KeywordAnalyzer()); Query query = new QueryParser(Version.LUCENE_41, "description", analyzer).parse( "partnum:Q36 AND SPACE"); assertEquals("Q36 kept as-is", "+partnum:Q36 +space", query.toString("description")); assertEquals("doc found!", 1, TestUtil.hitCount(searcher, query)); }
public void testIsValid() throws Exception { final File wd = getWorkDir(); final File cache = new File(wd,"cache"); cache.mkdirs(); final LuceneIndex index = LuceneIndex.create(cache, new KeywordAnalyzer()); //Empty index => invalid assertEquals(Index.Status.EMPTY, index.getStatus(true)); clearValidityCache(index); List<String> refs = new ArrayList<String>(); refs.add("A"); Set<String> toDel = new HashSet<String>(); index.store( refs, toDel, new StrToDocConvertor("resources"), new StrToQueryCovertor("resource"), true); //Existing index => valid assertEquals(Index.Status.VALID, index.getStatus(true)); assertTrue(cache.listFiles().length>0); clearValidityCache(index); createLock(index); //Index with orphan lock => invalid assertEquals(Index.Status.INVALID, index.getStatus(true)); assertTrue(cache.listFiles().length==0); refs.add("B"); clearValidityCache(index); index.store( refs, toDel, new StrToDocConvertor("resources"), new StrToQueryCovertor("resource"), true); assertEquals(Index.Status.VALID, index.getStatus(true)); assertTrue(cache.listFiles().length>0); //Broken index => invalid clearValidityCache(index); File bt = null; for (File file : cache.listFiles()) { // either compound file or filds information must be present if (file.getName().endsWith(".cfs") || file.getName().endsWith(".fnm")) { bt = file; break; } } assertNotNull(bt); FileOutputStream out = new FileOutputStream(bt); try { out.write(new byte[] {0,0,0,0,0,0,0,0,0,0}, 0, 10); } finally { out.close(); } assertEquals(Index.Status.INVALID, index.getStatus(true)); assertTrue(cache.listFiles().length==0); }
public void testConcurrentReadWrite() throws Exception { final Index index = IndexManager.createTransactionalIndex(indexFolder, new KeywordAnalyzer()); index.store( new ArrayList<String>(Arrays.asList("a")), //NOI18N Collections.<String>emptySet(), new TestInsertConvertor(), new TestDeleteConvertor(), true); final CountDownLatch slot = new CountDownLatch(1); final CountDownLatch signal = new CountDownLatch(1); final CountDownLatch done = new CountDownLatch(1); final AtomicReference<Exception> result = new AtomicReference<Exception>(); final Thread worker = new Thread(new Runnable() { @Override public void run() { try { index.store( new ArrayList<String>(Arrays.asList("b")), //NOI18N Collections.<String>emptySet(), new TestInsertConvertor(slot, signal), new TestDeleteConvertor(), true); } catch (Exception e) { result.set(e); } finally { done.countDown(); } } }); worker.start(); signal.await(); final Collection<String> data = new ArrayList<String>(); index.query( data, new Convertor<Document,String>(){ @Override public String convert(Document p) { return p.get(FLD_KEY); } }, null, new AtomicBoolean(), new PrefixQuery(new Term(FLD_KEY,""))); //NOI18N assertEquals(1, data.size()); assertEquals("a", data.iterator().next()); //NOI18N slot.countDown(); done.await(); assertNull(result.get()); }
@Test // 1000 times: 2973 mills. // 1000 times: 2927 mills. // 1000 times: 2967 mills. // // 10000 times: 21268 mills. // verified: ok public void search() throws Exception { createCommonDaoImpl(); // FullTextSession fullTextSession = Search.getFullTextSession(sessionFactory.openSession()); // StopAnalyzer 完全相同才能找到資料,同=,無法查中文 // StandardAnalyzer 能找到資料,同like Analyzer analyzer = new KeywordAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); QueryParser parser = new QueryParser(Version.LUCENE_31, "id", analyzer); parser.setAllowLeadingWildcard(true); parser.setLowercaseExpandedTerms(true); // // name:Marry // name:瑪莉 // String search = String.Format("name:{0} AND title:{1}", "中国建设银行", // "doc1"); StringBuilder lql = new StringBuilder(); // #issue: 大寫找不到??? // lql.append("id:*a*"); // lql.append("audit:*sys*"); lql.append("names:*a*"); org.apache.lucene.search.Query luceneQuery = parser.parse(lql.toString()); FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(luceneQuery, DogPoImpl.class); // List result = null; int count = 1; long beg = System.currentTimeMillis(); for (int i = 0; i < count; i++) { result = fullTextQuery.list(); } long end = System.currentTimeMillis(); System.out.println(count + " times: " + (end - beg) + " mills. "); System.out.println(result.size() + ", " + result); assertNotNull(result); }
private void convertCanonicOutput(CanonicOutput canonicOutput,Document document,LuceneOptions luceneOptions) { SimilarityForms sf = SimilarityFormConverterWrapper.getConverter().process(canonicOutput); document.add(newField("co.configuration.id", canonicOutput.getApplicationRun().getConfiguration().getId().toString(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36) ) ); document.add(newField("co.revision.id", canonicOutput.getApplicationRun().getRevision().getId().toString(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36) ) ); document.add(newField("co.applicationrun.id", canonicOutput.getApplicationRun().getId().toString(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36) ) ); if(canonicOutput.getAnnotations() != null && !canonicOutput.getAnnotations().isEmpty()) { for(Annotation a : canonicOutput.getAnnotations()) { document.add(newField("co.annotation", a.getAnnotationContent(), luceneOptions, new StandardAnalyzer(Version.LUCENE_36))); } } // mathml is converted into Single String representation // which is stored in co.distanceForm document.add(newField("co.distanceForm",sf.getDistanceForm(),luceneOptions,null)); PerFieldAnalyzerWrapper keywordAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer()); for(String s : sf.getCountForm().keySet()) { document.add(newField("co.element", s+"="+sf.getCountForm().get(s), luceneOptions, keywordAnalyzer)); } logger.info("Canonic output ["+canonicOutput.getId()+"] indexed."); }
public KeywordAnalyzer getKeywordAnalyzer() { return new KeywordAnalyzer(); }
/** * Creates a document based index * The returned {@link Index} is not cached, next call with the same arguments returns a different instance * of {@link Index}. The caller is responsible to cache the returned {@link DocumentIndex}. * @param cacheFolder the folder in which the index should be stored * @param cache the document caching provider * @return the document based index * @since 2.18.0 */ public static DocumentIndex createDocumentIndex ( final @NonNull File cacheFolder, final @NonNull DocumentIndexCache cache) throws IOException { Parameters.notNull("cacheFolder", cacheFolder); //NOI18N Parameters.notNull("cache", cache); //NOI18N return createDocumentIndex(createIndex(cacheFolder, new KeywordAnalyzer()), cache); }
/** * Creates a document based index * The returned {@link Index} is not cached, next call with the same arguments returns a different instance * of {@link Index}. The caller is responsible to cache the returned {@link DocumentIndex}. * @param cacheFolder the folder in which the index should be stored * @param isWritable <code>false</code> if it is read only index * @return the document based index * @since 2.27.1 */ public static DocumentIndex createDocumentIndex (final @NonNull File cacheFolder, boolean isWritable) throws IOException { Parameters.notNull("cacheFolder", cacheFolder); return createDocumentIndex(createIndex(cacheFolder, new KeywordAnalyzer(), isWritable)); }
/** * Creates a document based index * The returned {@link Index} is not cached, next call with the same arguments returns a different instance * of {@link Index}. The caller is responsible to cache the returned {@link DocumentIndex}. * @param cacheFolder the folder in which the index should be stored * @return the document based index * @since 1.1 */ public static DocumentIndex createDocumentIndex (final @NonNull File cacheFolder) throws IOException { Parameters.notNull("cacheFolder", cacheFolder); return createDocumentIndex(createIndex(cacheFolder, new KeywordAnalyzer())); }