public void testMultiPhrasePrefixQuery() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory dir = newDirectory(); String value = "The quick brown fox."; IndexReader ir = indexOneDoc(dir, "text", value, analyzer); MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fo")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, value, false); Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>.")); ir.close(); dir.close(); }
public void testAllTermQuery() throws IOException { Directory dir = newDirectory(); String value = "The quick brown fox."; Analyzer analyzer = new StandardAnalyzer(); IndexReader ir = indexOneDoc(dir, "all", value, analyzer); AllTermQuery query = new AllTermQuery(new Term("all", "fox")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, value, false); Snippet[] snippets = highlighter.highlightField("all", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("The quick brown <b>fox</b>.")); ir.close(); dir.close(); }
public void testCommonTermsQuery() throws IOException { Directory dir = newDirectory(); String value = "The quick brown fox."; Analyzer analyzer = new StandardAnalyzer(); IndexReader ir = indexOneDoc(dir, "text", value, analyzer); CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fox")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, value, false); Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>.")); ir.close(); dir.close(); }
public void testSimpleFormat() { String content = "This is a really cool highlighter. Postings highlighter gives nice snippets back. No matches here."; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder()); Passage[] passages = new Passage[3]; String match = "highlighter"; BytesRef matchBytesRef = new BytesRef(match); Passage passage1 = new Passage(); int start = content.indexOf(match); int end = start + match.length(); passage1.startOffset = 0; passage1.endOffset = end + 2; //lets include the whitespace at the end to make sure we trim it passage1.addMatch(start, end, matchBytesRef); passages[0] = passage1; Passage passage2 = new Passage(); start = content.lastIndexOf(match); end = start + match.length(); passage2.startOffset = passage1.endOffset; passage2.endOffset = end + 26; passage2.addMatch(start, end, matchBytesRef); passages[1] = passage2; Passage passage3 = new Passage(); passage3.startOffset = passage2.endOffset; passage3.endOffset = content.length(); passages[2] = passage3; Snippet[] fragments = passageFormatter.format(passages, content); assertThat(fragments, notNullValue()); assertThat(fragments.length, equalTo(3)); assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>.")); assertThat(fragments[0].isHighlighted(), equalTo(true)); assertThat(fragments[1].getText(), equalTo("Postings <em>highlighter</em> gives nice snippets back.")); assertThat(fragments[1].isHighlighted(), equalTo(true)); assertThat(fragments[2].getText(), equalTo("No matches here.")); assertThat(fragments[2].isHighlighted(), equalTo(false)); }
public void testSimpleFormat() { String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here."; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder()); Passage[] passages = new Passage[3]; String match = "highlighter"; BytesRef matchBytesRef = new BytesRef(match); Passage passage1 = new Passage(); int start = content.indexOf(match); int end = start + match.length(); passage1.setStartOffset(0); passage1.setEndOffset(end + 2); //lets include the whitespace at the end to make sure we trim it passage1.addMatch(start, end, matchBytesRef); passages[0] = passage1; Passage passage2 = new Passage(); start = content.lastIndexOf(match); end = start + match.length(); passage2.setStartOffset(passage1.getEndOffset()); passage2.setEndOffset(end + 26); passage2.addMatch(start, end, matchBytesRef); passages[1] = passage2; Passage passage3 = new Passage(); passage3.setStartOffset(passage2.getEndOffset()); passage3.setEndOffset(content.length()); passages[2] = passage3; Snippet[] fragments = passageFormatter.format(passages, content); assertThat(fragments, notNullValue()); assertThat(fragments.length, equalTo(3)); assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>.")); assertThat(fragments[0].isHighlighted(), equalTo(true)); assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back.")); assertThat(fragments[1].isHighlighted(), equalTo(true)); assertThat(fragments[2].getText(), equalTo("No matches here.")); assertThat(fragments[2].isHighlighted(), equalTo(false)); }
public void testCustomPostingsHighlighter() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); //good position but only one match final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter."; Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue(firstValue); //two matches, not the best snippet due to its length though final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower."; Field body2 = new Field("body", "", offsetsType); doc.add(body2); body2.setStringValue(secondValue); //two matches and short, will be scored highest final String thirdValue = "This is highlighting the third short highlighting value."; Field body3 = new Field("body", "", offsetsType); doc.add(body3); body3.setStringValue(thirdValue); //one match, same as first but at the end, will be scored lower due to its position final String fourthValue = "Just a test4 highlighting from postings highlighter."; Field body4 = new Field("body", "", offsetsType); doc.add(body4); body4.setStringValue(fourthValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter."; String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower."; String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value."; String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter."; IndexSearcher searcher = newSearcher(ir); Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false); Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(4)); assertThat(snippets[0].getText(), equalTo(firstHlValue)); assertThat(snippets[1].getText(), equalTo(secondHlValue)); assertThat(snippets[2].getText(), equalTo(thirdHlValue)); assertThat(snippets[3].getText(), equalTo(fourthHlValue)); ir.close(); dir.close(); }
public void testNoMatchSize() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Field none = new Field("none", "", offsetsType); Document doc = new Document(); doc.add(body); doc.add(none); String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore."; body.setStringValue(firstValue); none.setStringValue(firstValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); Query query = new TermQuery(new Term("none", "highlighting")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false); Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(0)); highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true); snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("This is a test.")); ir.close(); dir.close(); }
public void testCustomUnifiedHighlighter() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); offsetsType.setStoreTermVectorOffsets(true); offsetsType.setStoreTermVectorPositions(true); offsetsType.setStoreTermVectors(true); //good position but only one match final String firstValue = "This is a test. Just a test1 highlighting from unified highlighter."; Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue(firstValue); //two matches, not the best snippet due to its length though final String secondValue = "This is the second highlighting value to perform highlighting on a longer text " + "that gets scored lower."; Field body2 = new Field("body", "", offsetsType); doc.add(body2); body2.setStringValue(secondValue); //two matches and short, will be scored highest final String thirdValue = "This is highlighting the third short highlighting value."; Field body3 = new Field("body", "", offsetsType); doc.add(body3); body3.setStringValue(thirdValue); //one match, same as first but at the end, will be scored lower due to its position final String fourthValue = "Just a test4 highlighting from unified highlighter."; Field body4 = new Field("body", "", offsetsType); doc.add(body4); body4.setStringValue(fourthValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); String firstHlValue = "Just a test1 <b>highlighting</b> from unified highlighter."; String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a" + " longer text that gets scored lower."; String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value."; String fourthHlValue = "Just a test4 <b>highlighting</b> from unified highlighter."; IndexSearcher searcher = newSearcher(ir); Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, iwc.getAnalyzer(), new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), null, fieldValue, true); Snippet[] snippets = highlighter.highlightField("body", query, docId, 5); assertThat(snippets.length, equalTo(4)); assertThat(snippets[0].getText(), equalTo(firstHlValue)); assertThat(snippets[1].getText(), equalTo(secondHlValue)); assertThat(snippets[2].getText(), equalTo(thirdHlValue)); assertThat(snippets[3].getText(), equalTo(fourthHlValue)); ir.close(); dir.close(); }
public void testNoMatchSize() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); offsetsType.setStoreTermVectorOffsets(true); offsetsType.setStoreTermVectorPositions(true); offsetsType.setStoreTermVectors(true); Field body = new Field("body", "", offsetsType); Field none = new Field("none", "", offsetsType); Document doc = new Document(); doc.add(body); doc.add(none); String firstValue = "This is a test. Just a test highlighting from unified. Feel free to ignore."; body.setStringValue(firstValue); none.setStringValue(firstValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); Query query = new TermQuery(new Term("none", "highlighting")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, firstValue, false); Snippet[] snippets = highlighter.highlightField("body", query, docId, 5); assertThat(snippets.length, equalTo(0)); highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, firstValue, true); snippets = highlighter.highlightField("body", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("This is a test.")); ir.close(); dir.close(); }
public void testMultiValuedSortByScore() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer( random() ) ) ); Document doc = new Document(); FieldType type = new FieldType( TextField.TYPE_STORED ); type.setStoreTermVectorOffsets( true ); type.setStoreTermVectorPositions( true ); type.setStoreTermVectors( true ); type.freeze(); doc.add( new Field( "field", "zero if naught", type ) ); // The first two fields contain the best match doc.add( new Field( "field", "hero of legend", type ) ); // but total a lower score (3) than the bottom doc.add( new Field( "field", "naught of hero", type ) ); // two fields (4) doc.add( new Field( "field", "naught of hero", type ) ); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); fragmentsBuilder.setDiscreteMultiValueHighlighting( true ); IndexReader reader = DirectoryReader.open(writer, true ); String[] preTags = new String[] { "<b>" }; String[] postTags = new String[] { "</b>" }; Encoder encoder = new DefaultEncoder(); int docId = 0; BooleanQuery query = new BooleanQuery(); query.add( clause( "field", "hero" ), Occur.SHOULD); query.add( clause( "field", "of" ), Occur.SHOULD); query.add( clause( "field", "legend" ), Occur.SHOULD); FieldQuery fieldQuery = highlighter.getFieldQuery( query, reader ); for ( FragListBuilder fragListBuilder : new FragListBuilder[] { new SimpleFragListBuilder(), new WeightedFragListBuilder() } ) { String[] bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 20, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 28, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 30000, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]); } reader.close(); writer.close(); dir.close(); }
private void matchedFieldsTestCase( boolean useMatchedFields, boolean fieldMatch, String fieldValue, String expected, Query... queryClauses ) throws IOException { Document doc = new Document(); FieldType stored = new FieldType( TextField.TYPE_STORED ); stored.setStoreTermVectorOffsets( true ); stored.setStoreTermVectorPositions( true ); stored.setStoreTermVectors( true ); stored.freeze(); FieldType matched = new FieldType( TextField.TYPE_NOT_STORED ); matched.setStoreTermVectorOffsets( true ); matched.setStoreTermVectorPositions( true ); matched.setStoreTermVectors( true ); matched.freeze(); doc.add( new Field( "field", fieldValue, stored ) ); // Whitespace tokenized with English stop words doc.add( new Field( "field_exact", fieldValue, matched ) ); // Whitespace tokenized without stop words doc.add( new Field( "field_super_exact", fieldValue, matched ) ); // Whitespace tokenized without toLower doc.add( new Field( "field_characters", fieldValue, matched ) ); // Each letter is a token doc.add( new Field( "field_tripples", fieldValue, matched ) ); // Every three letters is a token doc.add( new Field( "field_sliced", fieldValue.substring( 0, // Sliced at 10 chars then analyzed just like field Math.min( fieldValue.length() - 1 , 10 ) ), matched ) ); doc.add( new Field( "field_der_red", new CannedTokenStream( // Hacky field containing "der" and "red" at pos = 0 token( "der", 1, 0, 3 ), token( "red", 0, 0, 3 ) ), matched ) ); final Map<String, Analyzer> fieldAnalyzers = new TreeMap<>(); fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) ); fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) ); fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) ); fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) ); fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) ); fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) ); fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) ); // This is required even though we provide a token stream Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { public Analyzer getWrappedAnalyzer(String fieldName) { return fieldAnalyzers.get( fieldName ); } }; Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(analyzer)); writer.addDocument( doc ); FastVectorHighlighter highlighter = new FastVectorHighlighter(); FragListBuilder fragListBuilder = new SimpleFragListBuilder(); FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); IndexReader reader = DirectoryReader.open( writer, true ); String[] preTags = new String[] { "<b>" }; String[] postTags = new String[] { "</b>" }; Encoder encoder = new DefaultEncoder(); int docId = 0; BooleanQuery query = new BooleanQuery(); for ( Query clause : queryClauses ) { query.add( clause, Occur.MUST ); } FieldQuery fieldQuery = new FieldQuery( query, reader, true, fieldMatch ); String[] bestFragments; if ( useMatchedFields ) { Set< String > matchedFields = new HashSet<>(); matchedFields.add( "field" ); matchedFields.add( "field_exact" ); matchedFields.add( "field_super_exact" ); matchedFields.add( "field_characters" ); matchedFields.add( "field_tripples" ); matchedFields.add( "field_sliced" ); matchedFields.add( "field_der_red" ); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", matchedFields, 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); } else { bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); } assertEquals( expected, bestFragments[ 0 ] ); reader.close(); writer.close(); dir.close(); }
private void matchedFieldsTestCase( boolean useMatchedFields, boolean fieldMatch, String fieldValue, String expected, Query... queryClauses ) throws IOException { Document doc = new Document(); FieldType stored = new FieldType( TextField.TYPE_STORED ); stored.setStoreTermVectorOffsets( true ); stored.setStoreTermVectorPositions( true ); stored.setStoreTermVectors( true ); stored.freeze(); FieldType matched = new FieldType( TextField.TYPE_NOT_STORED ); matched.setStoreTermVectorOffsets( true ); matched.setStoreTermVectorPositions( true ); matched.setStoreTermVectors( true ); matched.freeze(); doc.add( new Field( "field", fieldValue, stored ) ); // Whitespace tokenized with English stop words doc.add( new Field( "field_exact", fieldValue, matched ) ); // Whitespace tokenized without stop words doc.add( new Field( "field_super_exact", fieldValue, matched ) ); // Whitespace tokenized without toLower doc.add( new Field( "field_characters", fieldValue, matched ) ); // Each letter is a token doc.add( new Field( "field_tripples", fieldValue, matched ) ); // Every three letters is a token doc.add( new Field( "field_sliced", fieldValue.substring( 0, // Sliced at 10 chars then analyzed just like field Math.min( fieldValue.length() - 1 , 10 ) ), matched ) ); doc.add( new Field( "field_der_red", new CannedTokenStream( // Hacky field containing "der" and "red" at pos = 0 token( "der", 1, 0, 3 ), token( "red", 0, 0, 3 ) ), matched ) ); final Map<String, Analyzer> fieldAnalyzers = new TreeMap<String, Analyzer>(); fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) ); fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) ); fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) ); fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) ); fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) ); fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) ); fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) ); // This is required even though we provide a token stream Analyzer analyzer = new AnalyzerWrapper() { public Analyzer getWrappedAnalyzer(String fieldName) { return fieldAnalyzers.get( fieldName ); } }; Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer ) ); writer.addDocument( doc ); FastVectorHighlighter highlighter = new FastVectorHighlighter(); FragListBuilder fragListBuilder = new SimpleFragListBuilder(); FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); IndexReader reader = DirectoryReader.open( writer, true ); String[] preTags = new String[] { "<b>" }; String[] postTags = new String[] { "</b>" }; Encoder encoder = new DefaultEncoder(); int docId = 0; BooleanQuery query = new BooleanQuery(); for ( Query clause : queryClauses ) { query.add( clause, Occur.MUST ); } FieldQuery fieldQuery = new FieldQuery( query, reader, true, fieldMatch ); String[] bestFragments; if ( useMatchedFields ) { Set< String > matchedFields = new HashSet< String >(); matchedFields.add( "field" ); matchedFields.add( "field_exact" ); matchedFields.add( "field_super_exact" ); matchedFields.add( "field_characters" ); matchedFields.add( "field_tripples" ); matchedFields.add( "field_sliced" ); matchedFields.add( "field_der_red" ); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", matchedFields, 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); } else { bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); } assertEquals( expected, bestFragments[ 0 ] ); reader.close(); writer.close(); dir.close(); }