public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException { Map analysers = new HashMap<String, Analyzer>(); analysers.put("text", new StandardAnalyzer()); FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers); Query termQuery = new TermQuery(new Term("text", "failure")); Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD)) .add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build(); org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(boolQuery)); String fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"), "Arbitrary text field which should not cause a failure"); assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>")); Query rewritten = boolQuery.rewrite(null); highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(rewritten)); fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"), "Arbitrary text field which should not cause a failure"); assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>")); }
/** * Select only not already indexed files from a collection. * * @param fileCollection the file collection * @param location the location * @return the linked list * @throws IOException Signals that an I/O exception has occurred. * @throws ParseException the parse exception * @throws InvalidTokenOffsetsException the invalid token offsets exception */ // Return the collection with extracted (not indexed) Files public static LinkedList<File> selectOnlyNotAlreadyIndexedFilesFromACollection(List<File> fileCollection, String location) throws IOException, ParseException, InvalidTokenOffsetsException { // Getting the path of our index and opens a reader to read the index in the current folder Path dirPath = Paths.get(location); FSDirectory dir = FSDirectory.open(dirPath); DirectoryReader indexReader = DirectoryReader.open(dir); LinkedList<File> onlyNotIndexedFiles = new LinkedList<File>(); for (File file : fileCollection) { // Creates new Term using the path of the html file Term indexTerm = new Term("path", file.getPath()); // Checks if a document in the index has the same path if (indexReader.docFreq(indexTerm) > 0) { // If yes do not add the document continue; } else { onlyNotIndexedFiles.add(file); } } indexReader.close(); return onlyNotIndexedFiles; }
String highlight(String fieldName, String text) { if (text == null) { return null; } try { String highlighted = highlighter.getBestFragment(analyzer, fieldName, text); if (highlighted == null) { return null; } highlighted = cleanUpPattern.matcher(highlighted).replaceAll(""); highlighted = replaceLFPattern.matcher(highlighted).replaceAll(" "); if (highlighted.isEmpty()) { highlighted = null; } return highlighted; } catch (InvalidTokenOffsetsException|IOException e) { return null; } }
/** * ���� * @param analyzer * @param searcher * @throws IOException * @throws InvalidTokenOffsetsException */ public void searToHighlighterCss(Analyzer analyzer,IndexSearcher searcher) throws IOException, InvalidTokenOffsetsException{ Term term =new Term("Content", new String("免费".getBytes(),"GBK"));//��ѯ��������˼����Ҫ�����Ա�Ϊ��������� TermQuery query =new TermQuery(term); TopDocs docs =searcher.search(query, 10);//���� /**�Զ����ע�����ı���ǩ*/ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hightlighterCss\">","</span>"); /**����QueryScorer*/ QueryScorer scorer=new QueryScorer(query); /**����Fragmenter*/ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlight=new Highlighter(formatter,scorer); highlight.setTextFragmenter(fragmenter); for(ScoreDoc doc:docs.scoreDocs){//��ȡ���ҵ��ĵ���������� Document document =searcher.doc(doc.doc); String value = document.getField("Content").toString(); TokenStream tokenStream = analyzer.tokenStream("Content", new StringReader(value)); String str1 = highlight.getBestFragment(tokenStream, value); System.out.println(str1); } }
/** * * @throws IOException * @throws InvalidTokenOffsetsException */ private List<Person> translateDocs(final LuceneIndex<Person> luceneDb, final IndexSearcher searcher, final TopDocs topDocs, final Query query) throws IOException, InvalidTokenOffsetsException { final List<Person> dtcResult = new ArrayList<>(); for (final ScoreDoc scoreDoc : topDocs.scoreDocs) { final Document document = searcher.doc(scoreDoc.doc); final Person dto = luceneDb.getObjectIndexed(document.get(UUID_KEY)); dto.setScore(Math.round(scoreDoc.score / Math.max(topDocs.getMaxScore(), 2.5f) * 100)); dtcResult.add(dto); //final QueryScorer queryScorer = new QueryScorer(query); //final Highlighter highlighter = new Highlighter(queryScorer); //highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer, Integer.MAX_VALUE)); //highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); //final String[] strings = highlighter.getBestFragments(indexAnalyser, "fullname", dto.getName() + " " + dto.getFirstname(), 5); //System.out.println("found: " + Arrays.toString(strings)); } return dtcResult; }
@Override public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) { Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords); QueryParser parser = new QueryParser(defaultField, analyzer); String summary = null; try { SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post); Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query))); hg.setMaxDocCharsToAnalyze(preview); hg.setTextFragmenter(new SimpleFragmenter(100)); TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer); summary = hg.getBestFragments(tokens, content, 4, " ... "); } catch (InvalidTokenOffsetsException | IOException | ParseException ex) { LOG.error("Failed to highlight", ex); } return StringUtils.isBlank(summary) ? null : summary; }
/** * This method intended for use with * <tt>testHighlightingWithDefaultField()</tt> */ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text)); // Assuming "<B>", "</B>" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); return rv.length() == 0 ? text : rv; }
static String displayHtmlHighlight(Query query, Analyzer analyzer, String fieldName, String fieldContent, int fragmentSize) throws IOException, InvalidTokenOffsetsException { Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"), new QueryScorer(query)); Fragmenter fragmenter = new SimpleFragmenter(fragmentSize); highlighter.setTextFragmenter(fragmenter); return highlighter.getBestFragment(analyzer, fieldName, fieldContent); }
/** * Performs highlighting for a given query and a given document. * * @param indexSearcher the IndexSearcher performing the query * @param query the Tripod LuceneQuery * @param scoreDoc the Lucene ScoreDoc * @param doc the Lucene Document * @param highlighter the Highlighter to use * @param result the QueryResult to add the highlights to * @throws IOException if an error occurs performing the highlighting * @throws InvalidTokenOffsetsException if an error occurs performing the highlighting */ protected void performHighlighting(final IndexSearcher indexSearcher, final Query query, final ScoreDoc scoreDoc, final Document doc, final Highlighter highlighter, final QR result) throws IOException, InvalidTokenOffsetsException { if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) { return; } final List<Highlight> highlights = new ArrayList<>(); final List<String> hlFieldNames = getHighlightFieldNames(query, doc); // process each field to highlight on for (String hlField : hlFieldNames) { final String text = doc.get(hlField); if (StringUtils.isEmpty(text)) { continue; } final List<String> snippets = new ArrayList<>(); final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc); final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() -1; // get the snippets for the given field final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer, maxStartOffset); final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10); for (TextFragment textFragment : textFragments) { if (textFragment != null && textFragment.getScore() > 0) { snippets.add(textFragment.toString()); } } // if we have snippets then add a highlight result to the QueryResult if (snippets.size() > 0) { highlights.add(new Highlight(hlField, snippets)); } } result.setHighlights(highlights); }
@ResponseBody @RequestMapping(value = "/search/{key}", method = RequestMethod.GET, produces="application/json;charset=UTF-8") public String search(@PathVariable String key) throws IOException, ParseException, InvalidTokenOffsetsException { return searchData(key); }
public void testHighlightCustomQuery() throws IOException, InvalidTokenOffsetsException { String s1 = "I call our world Flatland, not because we call it so,"; // Verify that a query against the default field results in text being // highlighted // regardless of the field name. CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world")); String expected = "I call our <B>world</B> Flatland, not because we call it so,"; String observed = highlightField(q, "SOME_FIELD_NAME", s1); if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); assertEquals( "Query in the default field results in text for *ANY* field being highlighted", expected, observed); // Verify that a query against a named field does not result in any // highlighting // when the query field name differs from the name of the field being // highlighted, // which in this example happens to be the default field name. q = new CustomQuery(new Term("text", "world")); expected = s1; observed = highlightField(q, FIELD_NAME, s1); if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed); assertEquals( "Query in a named field does not result in highlighting when that field isn't in the query", s1, highlightField(q, FIELD_NAME, s1)); }
/** * This method intended for use with * <tt>testHighlightingWithDefaultField()</tt> */ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text); // Assuming "<B>", "</B>" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)"); return rv.length() == 0 ? text : rv; }
/** * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description. * * @param query * @param analyzer * @param doc * @param resultDocument * @throws IOException */ private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException { final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query)); // Get 3 best fragments of content and seperate with a "..." try { // highlight content final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME); TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content)); String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR); // if no highlightResult is in content => look in description if (highlightResult.length() == 0) { final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME); tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description)); highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR); resultDocument.setHighlightingDescription(true); } resultDocument.setHighlightResult(highlightResult); // highlight title final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME); tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title)); final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " "); resultDocument.setHighlightTitle(highlightTitle); } catch (final InvalidTokenOffsetsException e) { log.warn("", e); } }
private List<Person> executeQuery(final LuceneIndex<Person> luceneDb, final Query query, final int maxRow) throws IOException, InvalidTokenOffsetsException { try (final IndexReader indexReader = luceneDb.createIndexReader()) { final IndexSearcher searcher = new IndexSearcher(indexReader); //1. Exécution des la Requête final TopDocs topDocs = searcher.search(query, null, maxRow); //2. Traduction du résultat Lucene en une Collection return translateDocs(luceneDb, searcher, topDocs, query); } catch (final TooManyClauses e) { throw new RuntimeException("Too many clauses", e); } }
public void testGeoPointInBBoxQueryHighlighting() throws IOException, InvalidTokenOffsetsException { Query geoQuery = new GeoPointDistanceQuery("geo_point", -64.92354174306496, -170.15625, 5576757); checkGeoQueryHighlighting(geoQuery); }
public void testGeoPointDistanceQueryHighlighting() throws IOException, InvalidTokenOffsetsException { Query geoQuery = new GeoPointInBBoxQuery("geo_point", -64.92354174306496, 61.10078883158897, -170.15625, 118.47656249999999); checkGeoQueryHighlighting(geoQuery); }
public void testGeoPointInPolygonQueryHighlighting() throws IOException, InvalidTokenOffsetsException { double[] polyLats = new double[]{0, 60, 0, 0}; double[] polyLons = new double[]{0, 60, 90, 0}; Query geoQuery = new GeoPointInPolygonQuery("geo_point", polyLats, polyLons); checkGeoQueryHighlighting(geoQuery); }
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException { Directory directory = FSDirectory.open(new File(filePath)); IndexSearcher indexSearcher = new IndexSearcher(directory); QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods", new SmartChineseAnalyzer(Version.LUCENE_31, true)); //queryParser.setDefaultOperator(Operator.AND); Query query = queryParser.parse(key); TopDocs docs = indexSearcher.search(query, 10); QueryScorer queryScorer = new QueryScorer(query, "foods"); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer)); List<SearchResult> searchResults = new ArrayList<SearchResult>(); if (docs != null) { for (ScoreDoc scoreDoc : docs.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); TokenStream tokenStream = TokenSources.getAnyTokenStream( indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc, new SmartChineseAnalyzer(Version.LUCENE_31, true)); SearchResult searchResult = new SearchResult(); searchResult.setRestaurantId(Long.valueOf(doc.get("id"))); searchResult.setRestaurantName(doc.get("restaurant_name")); searchResult.setKey(key); searchResult.setFoods(Arrays.asList(highlighter. getBestFragment(tokenStream, doc.get("foods")).split(" "))); searchResults.add(searchResult); } } else { searchResults = null; } indexSearcher.close(); directory.close(); return new Gson().toJson(searchResults); }
private List<Person> getCollection(final String keywords, final String[] searchedDtFieldList, final int maxRows, final String boostedField) throws IOException, InvalidTokenOffsetsException { final Query query = createQuery(keywords, searchedDtFieldList, boostedField); return executeQuery(index, query, maxRows); }
/** * NOTE: This method will not preserve the correct field types. * * @param preTag * @param postTag */ public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager, IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException { String fieldLessFieldName = fieldManager.getFieldLessFieldName(); Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName); Analyzer analyzer = fieldManager.getAnalyzerForQuery(); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag); Document result = new Document(); for (IndexableField f : document) { String name = f.name(); if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) { result.add(f); continue; } String text = f.stringValue(); Number numericValue = f.numericValue(); Query fieldFixedQuery; if (fieldManager.isFieldLessIndexed(name)) { fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName); } else { fieldFixedQuery = fixedQuery; } if (numericValue != null) { if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) { String numberHighlight = preTag + text + postTag; result.add(new StringField(name, numberHighlight, Store.YES)); } } else { Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name)); TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { result.add(new StringField(name, frag[j].toString(), Store.YES)); } } } } return result; }
/** * * * @param query * @param analyzer * @param fieldName * @param fulltext * @param startDelimiter * @param stopDelimiter * @return * * @throws IOException * @throws InvalidTokenOffsetsException */ public static String getHighlightedField(Query query, Analyzer analyzer, String fieldName, String fulltext, final String startDelimiter, final String stopDelimiter) throws IOException, InvalidTokenOffsetsException { Formatter formatter = new SimpleHTMLFormatter(startDelimiter, stopDelimiter); QueryScorer queryScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, queryScorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer, Integer.MAX_VALUE)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); return highlighter.getBestFragment(analyzer, fieldName, fulltext); }
/** * Filtre une liste par des mots clés et une recherche fullText. * @param <Person> type d'objet de la liste * @param keywords Mots clés de la recherche * @param searchedFields Liste des champs sur lesquels porte la recheche * @param maxRows Nombre de résultat maximum * @param boostedField Liste des champs boostés (boost de 4 en dur) * @param dtc Liste source * @return Liste résultat */ public List<Person> getCollection(final String keywords) { try { return this.<Person> getCollection(keywords, new String[] { "fullname", "fullname_nospace" }, 20, "fullname"); } catch (final IOException | InvalidTokenOffsetsException e) { throw new RuntimeException("Erreur d'indexation", e); } }
/** * * * @param query * @param analyzer * @param fieldName * @param fieldValue * @return * * @throws IOException * @throws InvalidTokenOffsetsException */ public static String getHighlightedField(Query query, Analyzer analyzer, String fieldName, String fieldValue) throws IOException, InvalidTokenOffsetsException { return getHighlightedField(query, analyzer, fieldName, fieldValue, Config.SEARCH_DELIMITER_START, Config.SEARCH_DELIMITER_END); }