/** checks condition of the concatenation of two strings */ // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem // but this is a little bit more complicated. private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) { if (condition != 0) { CharacterRunAutomaton pattern = dictionary.patterns.get(condition); int state = pattern.getInitialState(); for (int i = c1off; i < c1off + c1len; i++) { state = pattern.step(state, c1[i]); if (state == -1) { return false; } } for (int i = c2off; i < c2off + c2len; i++) { state = pattern.step(state, c2[i]); if (state == -1) { return false; } } return pattern.isAccept(state); } return true; }
public void testMaxSizeEndHighlight() throws Exception { TestHighlightRunner helper = new TestHighlightRunner() { @Override public void run() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton()); TermQuery query = new TermQuery(new Term("text", "searchterm")); String text = "this is a text with searchterm in it"; SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = getHighlighter(query, "text", fm); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(36); String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text); assertTrue( "Matched text should contain remainder of text after highlighted query ", match.endsWith("in it")); } }; helper.start(); }
@Override public void setUp() throws Exception { super.setUp(); Automaton single = new Automaton(); int initial = single.createState(); int accept = single.createState(); single.setAccept(accept, true); // build an automaton matching this jvm's letter definition for (int i = 0; i <= 0x10FFFF; i++) { if (Character.isLetter(i)) { single.addTransition(initial, accept, i); } } Automaton repeat = Operations.repeat(single); jvmLetter = new CharacterRunAutomaton(repeat); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3", "field"); assertNotNull(q); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testStopwords() throws Exception { StandardQueryParser qp = new StandardQueryParser(); CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = qp.parse("a:the OR a:foo", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); result = qp.parse("a:woo OR a:the", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = qp.parse( "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); result = getQuery("field:woo OR field:the",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
/** Test a configuration where two characters makes a term */ public void testTwoChars() throws Exception { CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("..").toAutomaton()); Analyzer a = new MockAnalyzer(random(), single, false); assertAnalyzesTo(a, "foobar", new String[] { "fo", "ob", "ar"}, new int[] { 0, 2, 4 }, new int[] { 2, 4, 6 } ); // make sure when last term is a "partial" match that end() is correct assertTokenStreamContents(a.tokenStream("bogus", "fooba"), new String[] { "fo", "ob" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 }, new Integer(5) ); checkRandomData(random(), a, 100); }
/** Test a configuration where three characters makes a term */ public void testThreeChars() throws Exception { CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("...").toAutomaton()); Analyzer a = new MockAnalyzer(random(), single, false); assertAnalyzesTo(a, "foobar", new String[] { "foo", "bar"}, new int[] { 0, 3 }, new int[] { 3, 6 } ); // make sure when last term is a "partial" match that end() is correct assertTokenStreamContents(a.tokenStream("bogus", "fooba"), new String[] { "foo" }, new int[] { 0 }, new int[] { 3 }, new int[] { 1 }, new Integer(5) ); checkRandomData(random(), a, 100); }
/** Test a configuration where word starts with one uppercase */ public void testUppercase() throws Exception { CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton()); Analyzer a = new MockAnalyzer(random(), single, false); assertAnalyzesTo(a, "FooBarBAZ", new String[] { "Foo", "Bar", "B", "A", "Z"}, new int[] { 0, 3, 6, 7, 8 }, new int[] { 3, 6, 7, 8, 9 } ); assertAnalyzesTo(a, "aFooBar", new String[] { "Foo", "Bar" }, new int[] { 1, 4 }, new int[] { 4, 7 } ); checkRandomData(random(), a, 100); }
/** blast some random strings through differently configured tokenizers */ public void testRandomRegexps() throws Exception { int iters = atLeast(30); for (int i = 0; i < iters; i++) { final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE); final boolean lowercase = random().nextBoolean(); final int limit = TestUtil.nextInt(random(), 0, 500); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit); return new TokenStreamComponents(t, t); } }; checkRandomData(random(), a, 100); a.close(); } }
@Test public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); QueryParser qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0", qp); Assert.assertNotNull(q); q = getQuery("\"hello\"^2.0", qp); Assert.assertNotNull(q); Assert.assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0", qp); Assert.assertNotNull(q); Assert.assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0", qp); Assert.assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); QueryParser qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: Assert.assertNotNull(q); assertMatchNoDocsQuery(q); Assert.assertFalse(q instanceof BoostQuery); }
@Test public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo", qp); Assert.assertNotNull("result is null and it shouldn't be", result); Assert.assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery || result instanceof MatchNoDocsQuery); if (result instanceof BooleanQuery) { Assert.assertEquals(0, ((BooleanQuery) result).clauses().size()); } result = getQuery("field:woo OR field:the", qp); Assert.assertNotNull("result is null and it shouldn't be", result); Assert.assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp); Assert.assertNotNull("result is null and it shouldn't be", result); Assert.assertTrue("result is not a BoostQuery", result instanceof BoostQuery); result = ((BoostQuery) result).getQuery(); Assert.assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); Assert.assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
public void testMaxSizeEndHighlight() throws Exception { TestHighlightRunner helper = new TestHighlightRunner() { @Override public void run() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton()); TermQuery query = new TermQuery(new Term("text", "searchterm")); String text = "this is a text with searchterm in it"; SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = getHighlighter(query, "text", fm); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(36); String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true), "text", text); assertTrue( "Matched text should contain remainder of text after highlighted query ", match.endsWith("in it")); } }; helper.start(); }
@Override public void setUp() throws Exception { super.setUp(); // build an automaton matching this jvm's letter definition State initial = new State(); State accept = new State(); accept.setAccept(true); for (int i = 0; i <= 0x10FFFF; i++) { if (Character.isLetter(i)) { initial.addTransition(new Transition(i, i, accept)); } } Automaton single = new Automaton(initial); single.reduce(); Automaton repeat = BasicOperations.repeat(single); jvmLetter = new CharacterRunAutomaton(repeat); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3", "field"); assertNotNull(q); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testStopwords() throws Exception { StandardQueryParser qp = new StandardQueryParser(); CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true)); Query result = qp.parse("a:the OR a:foo", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); result = qp.parse("a:woo OR a:the", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = qp.parse( "(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", "a"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true)); Query result = getQuery("field:the OR field:foo",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); result = getQuery("field:woo OR field:the",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(getBoost(q), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(((BoostQuery)q).getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEmpty(q); assertEquals(1.0f, getBoost(q), 0.01f); }
public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo",qp); assertNotNull("result is null and it shouldn't be", result); System.out.println(result.getClass()); assertTrue("result is not a BooleanQuery", result instanceof SpanOrQuery || result instanceof BooleanQuery || result instanceof MatchNoDocsQuery); if (result instanceof BooleanQuery) { assertEquals(0, ((BooleanQuery) result).clauses().size()); } result = getQuery("field:woo OR field:the",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BoostQuery", result instanceof BoostQuery); result = ((BoostQuery) result).getQuery(); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3", "field"); assertNotNull(q); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
/** blast some random strings through differently configured tokenizers */ public void testRandomRegexps() throws Exception { int iters = atLeast(30); for (int i = 0; i < iters; i++) { final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random())); final boolean lowercase = random().nextBoolean(); final int limit = _TestUtil.nextInt(random(), 0, 500); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit); return new TokenStreamComponents(t, t); } }; checkRandomData(random(), a, 100); a.close(); } }
static void checkRemoteWhitelist(CharacterRunAutomaton whitelist, RemoteInfo remoteInfo) { if (remoteInfo == null) { return; } String check = remoteInfo.getHost() + ':' + remoteInfo.getPort(); if (whitelist.run(check)) { return; } throw new IllegalArgumentException('[' + check + "] not whitelisted in " + REMOTE_CLUSTER_WHITELIST.getKey()); }
/** * Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist * the world. */ static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) { if (whitelist.isEmpty()) { return new CharacterRunAutomaton(Automata.makeEmpty()); } Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY)); automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES); if (Operations.isTotal(automaton)) { throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. " + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs " + "for them."); } return new CharacterRunAutomaton(automaton); }
public MockTokenizer(AttributeFactory factory, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) { super(factory); this.runAutomaton = runAutomaton; this.lowerCase = lowerCase; this.state = runAutomaton.getInitialState(); this.maxTokenLength = maxTokenLength; }
public MockTokenizer(AttributeFactory factory, CharacterRunAutomaton runAutomaton, boolean lowerCase, int maxTokenLength) { super(factory); this.runAutomaton = runAutomaton; this.lowerCase = lowerCase; this.state = runAutomaton.getInitialState(); this.maxTokenLength = maxTokenLength; termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); }