/** * lookup * * @param suggester * @param keyword * @throws IOException */ private static List<String> lookup(AnalyzingInfixSuggester suggester, String keyword ) throws IOException { //先以contexts为过滤条件进行过滤,再以title为关键字进行筛选,根据weight值排序返回前2条 //第3个布尔值即是否每个Term都要匹配,第4个参数表示是否需要关键字高亮 List<LookupResult> results = suggester.lookup(keyword, 20, true, true); List<String> list = new ArrayList<>(); for (LookupResult result : results) { list.add(result.key.toString()); //从payload中反序列化出Blog对象 // BytesRef bytesRef = result.payload; // InputStream is = Tools.bytes2InputStream(bytesRef.bytes); // Blog blog = (Blog) Tools.deSerialize(is); // System.out.println("blog-Name:" + blog.getTitle()); // System.out.println("blog-Content:" + blog.getContent()); // System.out.println("blog-image:" + blog.getImageurl()); // System.out.println("blog-numberSold:" + blog.getHits()); } return list; }
public void testAfterLoad() throws Exception { Input keys[] = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = createTempDir("AnalyzingInfixSuggesterTest"); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3); suggester.build(new InputArrayIterator(keys)); assertEquals(2, suggester.getCount()); suggester.close(); suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newFSDirectory(tempDir), a, a, 3); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true); assertEquals(2, results.size()); assertEquals("a penny saved is a penny earned", results.get(0).key); assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); assertEquals(2, suggester.getCount()); suggester.close(); }
public void testRandomEdits() throws IOException { List<Input> keys = new ArrayList<>(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { keys.add(new Input("boo" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } keys.add(new Input("foo bar boo far", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); suggester.build(new InputArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2); assertEquals(addRandomEdit, 1, results.size()); assertEquals("foo bar boo far", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); } }
public void testNonLatinRandomEdits() throws IOException { List<Input> keys = new ArrayList<>(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } keys.add(new Input("фуу бар буу фар", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true); suggester.build(new InputArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("фуу бар буу", 0); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2); assertEquals(addRandomEdit, 1, results.size()); assertEquals("фуу бар буу фар", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); } }
public void testNoSeps() throws Exception { Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, true, 1, true, 1, 3, false); suggester.build(new InputArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, // but, analyzers don't produce SEP tokens! List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2); assertEquals(2, r.size()); // With no PRESERVE_SEPS specified, "ab c" should also // complete to "abcd", which has higher weight so should // appear first: assertEquals("abcd", r.get(0).key.toString()); }
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, true, 1, true, 1, 3, false); List<Input> keys = Arrays.asList(new Input[] { new Input("a", 40), new Input("a ", 50), new Input(" a", 60), }); Collections.shuffle(keys, random()); suggester.build(new InputArrayIterator(keys)); List<LookupResult> results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); assertEquals(" a", results.get(0).key); assertEquals(60, results.get(0).value); assertEquals("a ", results.get(1).key); assertEquals(50, results.get(1).value); }
public void testNoSeps() throws Exception { Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true); suggester.build(new InputArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, // but, analyzers don't produce SEP tokens! List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2); assertEquals(2, r.size()); // With no PRESERVE_SEPS specified, "ab c" should also // complete to "abcd", which has higher weight so should // appear first: assertEquals("abcd", r.get(0).key.toString()); }
public void testExactFirst() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(true); suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 20), new Input("x", 2), })); for(int topN=1;topN<4;topN++) { List<LookupResult> results = suggester.lookup("x", false, topN); assertEquals(Math.min(topN, 2), results.size()); assertEquals("x", results.get(0).key); assertEquals(2, results.get(0).value); if (topN > 1) { assertEquals("x y", results.get(1).key); assertEquals(20, results.get(1).value); } } }
public void testNonExactFirst() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 20), new Input("x", 2), })); for(int topN=1;topN<4;topN++) { List<LookupResult> results = suggester.lookup("x", false, topN); assertEquals(Math.min(topN, 2), results.size()); assertEquals("x y", results.get(0).key); assertEquals(20, results.get(0).value); if (topN > 1) { assertEquals("x", results.get(1).key); assertEquals(2, results.get(1).value); } } }
public void testMultilingualInput() throws Exception { List<Input> input = LookupBenchmarkTest.readTop50KWiki(); FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new InputArrayIterator(input)); assertEquals(input.size(), lookup.getCount()); for (Input tf : input) { assertNotNull("Not found: " + tf.term.toString(), lookup.get(TestUtil.bytesToCharSequence(tf.term, random()))); assertEquals(tf.term.utf8ToString(), lookup.lookup(TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString()); } List<LookupResult> result = lookup.lookup(stringToCharSequence("wit"), true, 5); assertEquals(5, result.size()); assertTrue(result.get(0).key.toString().equals("wit")); // exact match. assertTrue(result.get(1).key.toString().equals("with")); // highest count. }
public void testRandom() throws Exception { List<Input> freqs = new ArrayList<>(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); freqs.add(new Input("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()]))); for (Input tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) { assertTrue(lr.key.toString().startsWith(prefix)); } } } }
private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) throws IOException { CharsRef scratch = new CharsRef(); scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null || suggestions.size() == 0) { return null; } return suggestions; }
public void testRandomEdits() throws IOException { List<TermFreq> keys = new ArrayList<TermFreq>(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } keys.add(new TermFreq("foo bar boo far", 12)); FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); suggester.build(new TermFreqArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX); List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2); assertEquals(addRandomEdit, 1, results.size()); assertEquals("foo bar boo far", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); } }
public void testNoSeps() throws Exception { TermFreq[] keys = new TermFreq[] { new TermFreq("ab cd", 0), new TermFreq("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3); suggester.build(new TermFreqArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, // but, analyzers don't produce SEP tokens! List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence("ab c", random()), false, 2); assertEquals(2, r.size()); // With no PRESERVE_SEPS specified, "ab c" should also // complete to "abcd", which has higher weight so should // appear first: assertEquals("abcd", r.get(0).key.toString()); }
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3); List<TermFreq> keys = Arrays.asList(new TermFreq[] { new TermFreq("a", 40), new TermFreq("a ", 50), new TermFreq(" a", 60), }); Collections.shuffle(keys, random()); suggester.build(new TermFreqArrayIterator(keys)); List<LookupResult> results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); assertEquals(" a", results.get(0).key); assertEquals(60, results.get(0).value); assertEquals("a ", results.get(1).key); assertEquals(50, results.get(1).value); }
public void testNoSeps() throws Exception { TermFreq[] keys = new TermFreq[] { new TermFreq("ab cd", 0), new TermFreq("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1); suggester.build(new TermFreqArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, // but, analyzers don't produce SEP tokens! List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence("ab c", random()), false, 2); assertEquals(2, r.size()); // With no PRESERVE_SEPS specified, "ab c" should also // complete to "abcd", which has higher weight so should // appear first: assertEquals("abcd", r.get(0).key.toString()); }
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1); List<TermFreq> keys = Arrays.asList(new TermFreq[] { new TermFreq("a", 40), new TermFreq("a ", 50), new TermFreq(" a", 60), }); Collections.shuffle(keys, random()); suggester.build(new TermFreqArrayIterator(keys)); List<LookupResult> results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); assertEquals(" a", results.get(0).key); assertEquals(60, results.get(0).value); assertEquals("a ", results.get(1).key); assertEquals(50, results.get(1).value); }
public void testExactFirst() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(true); suggester.build(new TermFreqArrayIterator(new TermFreq[] { new TermFreq("x y", 20), new TermFreq("x", 2), })); for(int topN=1;topN<4;topN++) { List<LookupResult> results = suggester.lookup("x", false, topN); assertEquals(Math.min(topN, 2), results.size()); assertEquals("x", results.get(0).key); assertEquals(2, results.get(0).value); if (topN > 1) { assertEquals("x y", results.get(1).key); assertEquals(20, results.get(1).value); } } }
public void testNonExactFirst() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); suggester.build(new TermFreqArrayIterator(new TermFreq[] { new TermFreq("x y", 20), new TermFreq("x", 2), })); for(int topN=1;topN<4;topN++) { List<LookupResult> results = suggester.lookup("x", false, topN); assertEquals(Math.min(topN, 2), results.size()); assertEquals("x y", results.get(0).key); assertEquals(20, results.get(0).value); if (topN > 1) { assertEquals("x", results.get(1).key); assertEquals(2, results.get(1).value); } } }
public void testRandom() throws Exception { List<TermFreq> freqs = new ArrayList<TermFreq>(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); freqs.add(new TermFreq("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()]))); for (TermFreq tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) { assertTrue(lr.key.toString().startsWith(prefix)); } } } }
public void testHighlight() throws Exception { Input keys[] = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { @Override protected Directory getDirectory(File path) { return newDirectory(); } }; suggester.build(new InputArrayIterator(keys)); List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).key); suggester.close(); }
public void testRandomEdits() throws IOException { List<Input> keys = new ArrayList<Input>(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { keys.add(new Input("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } keys.add(new Input("foo bar boo far", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); suggester.build(new InputArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX); List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2); assertEquals(addRandomEdit, 1, results.size()); assertEquals("foo bar boo far", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); } }
public void testNonLatinRandomEdits() throws IOException { List<Input> keys = new ArrayList<Input>(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { keys.add(new Input("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } keys.add(new Input("фуу бар буу фар", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true); suggester.build(new InputArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("фуу бар буу", 0); List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2); assertEquals(addRandomEdit, 1, results.size()); assertEquals("фуу бар буу фар", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); } }
public void testNoSeps() throws Exception { Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, true, 1, true, 1, 3, false); suggester.build(new InputArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, // but, analyzers don't produce SEP tokens! List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence("ab c", random()), false, 2); assertEquals(2, r.size()); // With no PRESERVE_SEPS specified, "ab c" should also // complete to "abcd", which has higher weight so should // appear first: assertEquals("abcd", r.get(0).key.toString()); }
public void testNoSeps() throws Exception { Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true); suggester.build(new InputArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, // but, analyzers don't produce SEP tokens! List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence("ab c", random()), false, 2); assertEquals(2, r.size()); // With no PRESERVE_SEPS specified, "ab c" should also // complete to "abcd", which has higher weight so should // appear first: assertEquals("abcd", r.get(0).key.toString()); }
public void testRandom() throws Exception { List<Input> freqs = new ArrayList<Input>(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); freqs.add(new Input("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()]))); for (Input tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) { assertTrue(lr.key.toString().startsWith(prefix)); } } } }
@Override public int compare(LookupResult a, LookupResult b) { if (a.value > b.value) { return -1; } else if (a.value < b.value) { return 1; } else { // Tie break by UTF16 sort order: return ((String) a.key).compareTo((String) b.key); } }
private static String toString(List<LookupResult> results) { StringBuilder b = new StringBuilder(); for(LookupResult result : results) { b.append(' '); b.append(result.key); b.append('/'); b.append(String.format(Locale.ROOT, "%.2f", ((double) result.value)/Long.MAX_VALUE)); } return b.toString().trim(); }
public void testHighlight() throws Exception { Input keys[] = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3); suggester.build(new InputArrayIterator(keys)); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a penny saved is a penny earned", results.get(0).key); assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey); suggester.close(); }
public void testHighlightCaseChange() throws Exception { Input keys[] = new Input[] { new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")), }; Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3); suggester.build(new InputArrayIterator(keys)); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a Penny saved is a penny earned", results.get(0).key); assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey); suggester.close(); // Try again, but overriding addPrefixMatch to highlight // the entire hit: suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3) { @Override protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) { sb.append("<b>"); sb.append(surface); sb.append("</b>"); } }; suggester.build(new InputArrayIterator(keys)); results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a Penny saved is a penny earned", results.get(0).key); assertEquals("a <b>Penny</b> saved is a <b>penny</b> earned", results.get(0).highlightKey); suggester.close(); }
public void testBothExactAndPrefix() throws Exception { Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, newDirectory(), a, a, 3); suggester.build(new InputArrayIterator(new Input[0])); suggester.add(new BytesRef("the pen is pretty"), null, 10, new BytesRef("foobaz")); suggester.refresh(); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("pen p", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("the pen is pretty", results.get(0).key); assertEquals("the <b>pen</b> is <b>p</b>retty", results.get(0).highlightKey); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); suggester.close(); }
/** * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { Input keys[] = new Input[] { new Input("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); FuzzySuggester suggester = new FuzzySuggester(standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); suggester.build(new InputArrayIterator(keys)); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); assertEquals("the ghost of christmas past", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // omit the 'the' since its a stopword, its suggested anyway results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1); assertEquals(1, results.size()); assertEquals("the ghost of christmas past", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // omit the 'the' and 'of' since they are stopwords, its suggested anyway results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1); assertEquals(1, results.size()); assertEquals("the ghost of christmas past", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); }
public void testEmpty() throws Exception { FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); suggester.build(new InputArrayIterator(new Input[0])); List<LookupResult> result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); }
public void testExactFirst() throws Exception { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false); suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 1), new Input("x y z", 3), new Input("x", 2), new Input("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); for(int topN=1;topN<6;topN++) { List<LookupResult> results = suggester.lookup("x y", false, topN); //System.out.println("topN=" + topN + " " + results); assertEquals(Math.min(topN, 4), results.size()); assertEquals("x y", results.get(0).key); assertEquals(1, results.get(0).value); if (topN > 1) { assertEquals("z z z", results.get(1).key); assertEquals(20, results.get(1).value); if (topN > 2) { assertEquals("x y z", results.get(2).key); assertEquals(3, results.get(2).value); if (topN > 3) { assertEquals("x", results.get(3).key); assertEquals(2, results.get(3).value); } } } } }
public void testNonExactFirst() throws Exception { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false); suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 1), new Input("x y z", 3), new Input("x", 2), new Input("z z z", 20), })); for(int topN=1;topN<6;topN++) { List<LookupResult> results = suggester.lookup("p", false, topN); assertEquals(Math.min(topN, 4), results.size()); assertEquals("z z z", results.get(0).key); assertEquals(20, results.get(0).value); if (topN > 1) { assertEquals("x y z", results.get(1).key); assertEquals(3, results.get(1).value); if (topN > 2) { assertEquals("x", results.get(2).key); assertEquals(2, results.get(2).value); if (topN > 3) { assertEquals("x y", results.get(3).key); assertEquals(1, results.get(3).value); } } } } }
@Override public int compare(LookupResult a, LookupResult b) { if (a.value > b.value) { return -1; } else if (a.value < b.value) { return 1; } else { final int c = CHARSEQUENCE_COMPARATOR.compare(a.key, b.key); assert c != 0: "term=" + a.key; return c; } }
/** * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { Input keys[] = new Input[] { new Input("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false); suggester.build(new InputArrayIterator(keys)); List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); assertEquals("the ghost of christmas past", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // omit the 'the' since its a stopword, its suggested anyway results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1); assertEquals(1, results.size()); assertEquals("the ghost of christmas past", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // omit the 'the' and 'of' since they are stopwords, its suggested anyway results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1); assertEquals(1, results.size()); assertEquals("the ghost of christmas past", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); }