/** * ajax简历索引 */ @Override public void ajaxbuild() { try { FileUtils.deleteDirectory(new File(AUTOCOMPLETEPATH)); logger.info("delete autocomplete file success"); Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH)); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer); //创建Blog测试数据 List<Blog> blogs = blogMapper.getAllBlog(); suggester.build(new BlogIterator(blogs.iterator())); } catch (IOException e) { System.err.println("Error!"); } }
/** * 根据关键词查找 * * @param keyword * @return */ @Override public Set<String> ajaxsearch(String keyword) { try { Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH)); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer); List<String> list = lookup(suggester, keyword); list.sort((o1, o2) -> { if (o1.length() > o2.length()) { return 1; } else { return -1; } }); Set<String> set = new LinkedHashSet<>(list); ssubSet(set, 7); return set; } catch (IOException e) { System.err.println("Error!"); return null; } }
private static void init() { if (indexWriter==null || searcherManager==null) { try { // load directory path Properties properties = PropertiesUtil.loadProperties(PropertiesUtil.DEFAULT_CONFIG); String luceneDirectory = PropertiesUtil.getString(properties, "lucene.directory"); // directory directory = new SimpleFSDirectory(Paths.get(luceneDirectory)); // IndexWriter Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriter = new IndexWriter(directory, indexWriterConfig); // SearcherManager searcherManager = new SearcherManager(indexWriter, false, new SearcherFactory()); TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); ControlledRealTimeReopenThread controlledRealTimeReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(trackingIndexWriter, searcherManager, 5.0, 0.025); controlledRealTimeReopenThread.setDaemon(true);//设为后台进程 controlledRealTimeReopenThread.start(); } catch (IOException e) { logger.error("", e); } } }
private void index() throws Exception { IndexWriter writer = null; directory = new RAMDirectory(); // 索引文件在内存 Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_46, analyzer); writer = new IndexWriter(directory, iwConfig); List<Product> products = htmlFetcher.fetch(); for (Product product : products) { Document doc = new Document(); doc.add(new TextField("description", product.getDescription(), Store.YES)); doc.add(new StringField("username", product.getUserName(), Store.YES)); doc.add(new StringField("url", product.getUrl(), Store.YES)); writer.addDocument(doc); } writer.commit(); writer.close(); }
public static void main(String[] args) { try { Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH)); RAMDirectory indexDir = new RAMDirectory(); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer); IBlogService blogService = new BlogServiceImpl(); lookup(suggester, "jav"); // new BlogServiceImpl().ajaxsearch("北京"); } catch (Exception e) { e.printStackTrace(); } }
private IndexWriter getWriter() throws Exception { dir = FSDirectory.open(Paths.get(BASE_PATH)); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); return writer; }
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException { Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(50); mergePolicy.setMaxMergeDocs(5000); if (create){ iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } return new IndexWriter(dir, iwc); }
/** * 获取IndexWriter实例 * * @return * @throws Exception */ private IndexWriter getWriter() throws Exception { //Analyzer analyzer=new StandardAnalyzer(); // 标准分词器 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
public static void search(String indexDir, String q) throws Exception { Directory dir = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is = new IndexSearcher(reader); // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("desc", analyzer); Query query = parser.parse(q); long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录"); QueryScorer scorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("city")); System.out.println(doc.get("desc")); String desc = doc.get("desc"); if (desc != null) { TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc)); System.out.println(highlighter.getBestFragment(tokenStream, desc)); } } reader.close(); }
/** * 获取IndexWriter实例 * * @return * @throws Exception */ private IndexWriter getWriter() throws Exception { /* * 生成的索引位置在env-config.properties里配置 */ dir = FSDirectory.open(Paths.get(PropertiesUtil.getValue(EnvEnum.LUCENE_INDEX_PATH.val()))); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
/** * 获取IndexWriter实例 * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ /** * 生成的索引我放在了C盘,可以根据自己的需要放在具体位置 */ dir= FSDirectory.open(Paths.get("C://lucene")); SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; }
@Nonnull private static CharArraySet stopWords(@Nonnull final String[] array) throws UDFArgumentException { if (array == null) { return SmartChineseAnalyzer.getDefaultStopSet(); } if (array.length == 0) { return CharArraySet.EMPTY_SET; } CharArraySet results = new CharArraySet(Arrays.asList(array), true /* ignoreCase */); return results; }
/** * 获取IndexWriter实例 * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ //Analyzer analyzer=new StandardAnalyzer(); // 标准分词器 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; }
public static void search(String indexDir,String q)throws Exception{ Directory dir=FSDirectory.open(Paths.get(indexDir)); IndexReader reader=DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); QueryParser parser=new QueryParser("desc", analyzer); Query query=parser.parse(q); long start=System.currentTimeMillis(); TopDocs hits=is.search(query, 10); long end=System.currentTimeMillis(); System.out.println("匹配 "+q+" ,总共花费"+(end-start)+"毫秒"+"查询到"+hits.totalHits+"个记录"); QueryScorer scorer=new QueryScorer(query); Fragmenter fragmenter=new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>"); Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); System.out.println(doc.get("city")); System.out.println(doc.get("desc")); String desc=doc.get("desc"); if(desc!=null){ TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc)); System.out.println(highlighter.getBestFragment(tokenStream, desc)); } } reader.close(); }
public static void main(final String[] args) { try { final String text = "lucene分析器使用分词器和过滤器构成一个“管道”,文本在流经这个管道后成为可以进入索引的最小单位,因此,一个标准的分析器有两个部分组成,一个是分词器tokenizer,它用于将文本按照规则切分为一个个可以进入索引的最小单位。另外一个是TokenFilter,它主要作用是对切出来的词进行进一步的处理(如去掉敏感词、英文大小写转换、单复数处理)等。lucene中的Tokenstram方法首先创建一个tokenizer对象处理Reader对象中的流式文本,然后利用TokenFilter对输出流进行过滤处理"; final ArrayList<String> myStopWords = CollectionLiterals.<String>newArrayList("的", "在", "了", "呢", ",", "0", ":", ",", "是", "流"); final CharArraySet stopWords = new CharArraySet(0, true); for (final String word : myStopWords) { stopWords.add(word); } CharArraySet _defaultStopSet = SmartChineseAnalyzer.getDefaultStopSet(); final Iterator<Object> itor = _defaultStopSet.iterator(); while (itor.hasNext()) { Object _next = itor.next(); stopWords.add(_next); } final SmartChineseAnalyzer sca = new SmartChineseAnalyzer(stopWords); final TokenStream ts = sca.tokenStream("field", text); CharTermAttribute ch = ts.<CharTermAttribute>addAttribute(CharTermAttribute.class); TypeAttribute type = ts.<TypeAttribute>addAttribute(TypeAttribute.class); ts.reset(); while (ts.incrementToken()) { String _string = ch.toString(); String _plus = (_string + " | "); String _type = type.type(); String _plus_1 = (_plus + _type); InputOutput.<String>println(_plus_1); } ts.end(); ts.close(); } catch (Throwable _e) { throw Exceptions.sneakyThrow(_e); } }
public SmartChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new SmartChineseAnalyzer(SmartChineseAnalyzer.getDefaultStopSet()); }
@Override public SmartChineseAnalyzer get() { return this.analyzer; }
@Test public void testSmartCn(){ analyzer = new SmartChineseAnalyzer(); }
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException { Directory directory = FSDirectory.open(new File(filePath)); IndexSearcher indexSearcher = new IndexSearcher(directory); QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods", new SmartChineseAnalyzer(Version.LUCENE_31, true)); //queryParser.setDefaultOperator(Operator.AND); Query query = queryParser.parse(key); TopDocs docs = indexSearcher.search(query, 10); QueryScorer queryScorer = new QueryScorer(query, "foods"); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer)); List<SearchResult> searchResults = new ArrayList<SearchResult>(); if (docs != null) { for (ScoreDoc scoreDoc : docs.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); TokenStream tokenStream = TokenSources.getAnyTokenStream( indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc, new SmartChineseAnalyzer(Version.LUCENE_31, true)); SearchResult searchResult = new SearchResult(); searchResult.setRestaurantId(Long.valueOf(doc.get("id"))); searchResult.setRestaurantName(doc.get("restaurant_name")); searchResult.setKey(key); searchResult.setFoods(Arrays.asList(highlighter. getBestFragment(tokenStream, doc.get("foods")).split(" "))); searchResults.add(searchResult); } } else { searchResults = null; } indexSearcher.close(); directory.close(); return new Gson().toJson(searchResults); }