public static String getHighlightString (String text, String keyword) throws IOException { TermQuery query = new TermQuery(new Term("f", keyword)); QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">","</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_20).tokenStream("f", new StringReader(text)); //String result = highlighter.getBestFragments(tokenStream, text, 30, "..."); StringBuilder writer = new StringBuilder(""); writer.append("<html>"); writer.append("<style>\n" + ".highlight {\n" + " background: yellow;\n" + "}\n" + "</style>"); writer.append("<body>"); writer.append(""); writer.append("</body></html>"); return ( writer.toString() ); }
@Override public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) { Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords); QueryParser parser = new QueryParser(defaultField, analyzer); String summary = null; try { SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post); Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query))); hg.setMaxDocCharsToAnalyze(preview); hg.setTextFragmenter(new SimpleFragmenter(100)); TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer); summary = hg.getBestFragments(tokens, content, 4, " ... "); } catch (InvalidTokenOffsetsException | IOException | ParseException ex) { LOG.error("Failed to highlight", ex); } return StringUtils.isBlank(summary) ? null : summary; }
/** * This method intended for use with * <tt>testHighlightingWithDefaultField()</tt> */ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text)); // Assuming "<B>", "</B>" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); return rv.length() == 0 ? text : rv; }
static String displayHtmlHighlight(Query query, Analyzer analyzer, String fieldName, String fieldContent, int fragmentSize) throws IOException, InvalidTokenOffsetsException { Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"), new QueryScorer(query)); Fragmenter fragmenter = new SimpleFragmenter(fragmentSize); highlighter.setTextFragmenter(fragmenter); return highlighter.getBestFragment(analyzer, fieldName, fieldContent); }
/** * This method intended for use with * <tt>testHighlightingWithDefaultField()</tt> */ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text); // Assuming "<B>", "</B>" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); return rv.length() == 0 ? text : rv; }
protected Highlighter createHighlighter(org.apache.lucene.search.Query luceneQuery) { SimpleHTMLFormatter format = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter highlighter = new Highlighter(format, new QueryScorer(luceneQuery));// 高亮 // highlighter.setTextFragmenter(new // SimpleFragmenter(Integer.MAX_VALUE)); highlighter.setTextFragmenter(new SimpleFragmenter(200)); return highlighter; }
public static void main(String[] args) throws Exception { if (args.length != 0) { QUERY = args[0]; } // 将庖丁封装成符合Lucene要求的Analyzer规范 Analyzer analyzer = new PaodingAnalyzer(); //读取本类目录下的text.txt文件 String content = ContentReader.readText(English.class); //接下来是标准的Lucene建立索引和检索的代码 Directory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, analyzer); Document doc = new Document(); Field fd = new Field(FIELD_NAME, content, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.parse(queryString); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter = new Highlighter(formatter, new QueryScorer( query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector) reader .getTermFreqVector(hits.id(i), FIELD_NAME); TokenStream tokenStream = TokenSources.getTokenStream(tpv); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); }
public static void main(String[] args) throws Exception { if (args.length != 0) { QUERY = args[0]; } // 将庖丁封装成符合Lucene要求的Analyzer规范 Analyzer analyzer = new PaodingAnalyzer(); //读取本类目录下的text.txt文件 String content = ContentReader.readText(Chinese.class); //接下来是标准的Lucene建立索引和检索的代码 Directory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, analyzer); Document doc = new Document(); Field fd = new Field(FIELD_NAME, content, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.parse(queryString); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter = new Highlighter(formatter, new QueryScorer( query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector) reader .getTermFreqVector(hits.id(i), FIELD_NAME); TokenStream tokenStream = TokenSources.getTokenStream(tpv); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); }