public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3", "field"); assertNotNull(q); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet, true); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i); //System.out.println("compute automaton n=" + i); // constant prefix if (realPrefixLength > 0) { Automaton prefix = BasicAutomata.makeString( UnicodeUtil.newString(termText, 0, realPrefixLength)); a = BasicOperations.concatenate(prefix, a); } runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
public void testCustomProvider() throws IOException { AutomatonProvider myProvider = new AutomatonProvider() { // automaton that matches quick or brown private Automaton quickBrownAutomaton = BasicOperations.union(Arrays .asList(BasicAutomata.makeString("quick"), BasicAutomata.makeString("brown"), BasicAutomata.makeString("bob"))); @Override public Automaton getAutomaton(String name) { if (name.equals("quickBrown")) return quickBrownAutomaton; else return null; } }; RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL, myProvider); assertEquals(1, searcher.search(query, 5).totalHits); }
/** * Test some very simple automata. */ public void testBasicAutomata() throws IOException { assertAutomatonHits(0, BasicAutomata.makeEmpty()); assertAutomatonHits(0, BasicAutomata.makeEmptyString()); assertAutomatonHits(2, BasicAutomata.makeAnyChar()); assertAutomatonHits(3, BasicAutomata.makeAnyString()); assertAutomatonHits(2, BasicAutomata.makeString("doc")); assertAutomatonHits(1, BasicAutomata.makeChar('a')); assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b')); assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0)); assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0)); assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'), BasicAutomata.makeChar('b'))); assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata .makeChar('a'), BasicAutomata.makeChar('b'))); assertAutomatonHits(1, BasicOperations.minus(BasicAutomata.makeCharRange('a', 'b'), BasicAutomata.makeChar('a'))); }
public void testOverlappedTokensLattice() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("abc", 1, 1), token("xyz", 0, 2), token("def", 1, 1), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton a1 = BasicAutomata.makeString("xyz"); final Automaton a2 = join("abc", "def"); final Automaton expected = BasicOperations.union(a1, a2); //toDot(actual); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
public void testSynOverHole() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("a", 1, 1), token("X", 0, 2), token("b", 2, 1), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton a1 = BasicOperations.union( join(s2a("a"), SEP_A, HOLE_A), BasicAutomata.makeString("X")); final Automaton expected = BasicOperations.concatenate(a1, join(SEP_A, s2a("b"))); //toDot(actual); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
public void testOverlappedTokensLattice2() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("abc", 1, 1), token("xyz", 0, 3), token("def", 1, 1), token("ghi", 1, 1), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton a1 = BasicAutomata.makeString("xyz"); final Automaton a2 = join("abc", "def", "ghi"); final Automaton expected = BasicOperations.union(a1, a2); //toDot(actual); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3", "field"); assertNotNull(q); }
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
@BeforeClass public static void beforeClass() throws Exception { Random random = random(); directory = newDirectory(); stopword = "" + randomChar(); CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword)); analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset, true); RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer); Document doc = new Document(); Field id = new StringField("id", "", Field.Store.NO); Field field = new TextField("field", "", Field.Store.NO); doc.add(id); doc.add(field); // index some docs int numDocs = atLeast(1000); for (int i = 0; i < numDocs; i++) { id.setStringValue(Integer.toString(i)); field.setStringValue(randomFieldContents()); iw.addDocument(doc); } // delete some docs int numDeletes = numDocs/20; for (int i = 0; i < numDeletes; i++) { Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs))); if (random.nextBoolean()) { iw.deleteDocuments(toDelete); } else { iw.deleteDocuments(new TermQuery(toDelete)); } } reader = iw.getReader(); s1 = newSearcher(reader); s2 = newSearcher(reader); iw.close(); }
/** * Test that a nondeterministic automaton works correctly. (It should will be * determinized) */ public void testNFA() throws IOException { // accept this or three, the union is an NFA (two transitions for 't' from // initial state) Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"), BasicAutomata.makeString("three")); assertAutomatonHits(2, nfa); }
public void testEquals() { AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata .makeString("foobar")); // reference to a1 AutomatonQuery a2 = a1; // same as a1 (accepts the same language, same term) AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations .concatenate(BasicAutomata.makeString("foo"), BasicAutomata .makeString("bar"))); // different than a1 (same term, but different language) AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata .makeString("different")); // different than a1 (different term, same language) AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata .makeString("foobar")); assertEquals(a1, a2); assertEquals(a1, a3); // different class AutomatonQuery w1 = new WildcardQuery(newTerm("foobar")); // different class AutomatonQuery w2 = new RegexpQuery(newTerm("foobar")); assertFalse(a1.equals(w1)); assertFalse(a1.equals(w2)); assertFalse(w1.equals(w2)); assertFalse(a1.equals(a4)); assertFalse(a1.equals(a5)); assertFalse(a1.equals(null)); }
/** * Test that rewriting to a single term works as expected, preserves * MultiTermQuery semantics. */ public void testRewriteSingleTerm() throws IOException { AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata .makeString("piece")); Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN); assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum); assertEquals(1, automatonQueryNrHits(aq)); }
/** * Test that rewriting to a prefix query works as expected, preserves * MultiTermQuery semantics. */ public void testRewritePrefix() throws IOException { Automaton pfx = BasicAutomata.makeString("do"); pfx.expandSingleton(); // expand singleton representation for testing Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata .makeAnyString()); AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton); Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN); assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum); assertEquals(3, automatonQueryNrHits(aq)); }
/** * Test handling of the empty language */ public void testEmptyOptimization() throws IOException { AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata .makeEmpty()); // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader()) // instanceof EmptyTermEnum); Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN); assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms)); assertEquals(0, automatonQueryNrHits(aq)); }
/** Test a configuration that behaves a lot like KeepWordFilter */ public void testKeep() throws Exception { CharacterRunAutomaton keepWords = new CharacterRunAutomaton( BasicOperations.complement( Automaton.union( Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar"))))); Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords, true); assertAnalyzesTo(a, "quick foo brown bar bar fox foo", new String[] { "foo", "bar", "bar", "foo" }, new int[] { 2, 2, 1, 2 }); }
public void testSingleToken() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("abc", 1, 1), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton expected = BasicAutomata.makeString("abc"); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
private Automaton join(String ... strings) { List<Automaton> as = new ArrayList<Automaton>(); for(String s : strings) { as.add(BasicAutomata.makeString(s)); as.add(SEP_A); } as.remove(as.size()-1); return BasicOperations.concatenate(as); }
public void testOverlappedTokensSausage() throws Exception { // Two tokens on top of each other (sausage): final TokenStream ts = new CannedTokenStream( new Token[] { token("abc", 1, 1), token("xyz", 0, 1) }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton a1 = BasicAutomata.makeString("abc"); final Automaton a2 = BasicAutomata.makeString("xyz"); final Automaton expected = BasicOperations.union(a1, a2); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
public void testSynOverHole2() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("xyz", 1, 1), token("abc", 0, 3), token("def", 2, 1), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton expected = BasicOperations.union( join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")), BasicAutomata.makeString("abc")); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
public void testSynHangingOverEnd() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("a", 1, 1), token("X", 0, 10), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton expected = BasicOperations.union(BasicAutomata.makeString("a"), BasicAutomata.makeString("X")); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
@BeforeClass public static void beforeClass() throws Exception { Random random = random(); directory = newDirectory(); stopword = "" + randomChar(); CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword)); analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset); RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer); Document doc = new Document(); Field id = new StringField("id", "", Field.Store.NO); Field field = new TextField("field", "", Field.Store.NO); doc.add(id); doc.add(field); // index some docs int numDocs = atLeast(1000); for (int i = 0; i < numDocs; i++) { id.setStringValue(Integer.toString(i)); field.setStringValue(randomFieldContents()); iw.addDocument(doc); } // delete some docs int numDeletes = numDocs/20; for (int i = 0; i < numDeletes; i++) { Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs))); if (random.nextBoolean()) { iw.deleteDocuments(toDelete); } else { iw.deleteDocuments(new TermQuery(toDelete)); } } reader = iw.getReader(); s1 = newSearcher(reader); s2 = newSearcher(reader); iw.close(); }
public void testStopwordsPosIncHole2() throws Exception { // use two stopfilters for testing here Directory dir = newDirectory(); final Automaton secondSet = BasicAutomata.makeString("foobar"); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader); TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet)); return new TokenStreamComponents(tokenizer, stream); } }; RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); Document doc = new Document(); doc.add(new TextField("body", "just a foobar", Field.Store.NO)); doc.add(new TextField("body", "test of gaps", Field.Store.NO)); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); PhraseQuery pq = new PhraseQuery(); pq.add(new Term("body", "just"), 0); pq.add(new Term("body", "test"), 3); // body:"just ? ? test" assertEquals(1, is.search(pq, 5).totalHits); ir.close(); dir.close(); }
/** Test a configuration that behaves a lot like KeepWordFilter */ public void testKeep() throws Exception { CharacterRunAutomaton keepWords = new CharacterRunAutomaton( BasicOperations.complement( Automaton.union( Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar"))))); Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords); assertAnalyzesTo(a, "quick foo brown bar bar fox foo", new String[] { "foo", "bar", "bar", "foo" }, new int[] { 2, 2, 1, 2 }); }