@Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { XContentParser parser = parseContext.parser(); Token token = parser.nextToken(); if (!MATCH_NAME.equals(parser.currentName()) || token != XContentParser.Token.FIELD_NAME) { throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause"); } token = parser.nextToken(); if (token != XContentParser.Token.START_OBJECT) { throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause"); } Query subQuery = parseContext.parseInnerQuery(); if (!(subQuery instanceof MultiTermQuery)) { throw new QueryParsingException(parseContext, "spanMultiTerm [" + MATCH_NAME + "] must be of type multi term query"); } parser.nextToken(); return new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); }
@Test public void testWildcardStarRewritten () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // meine* /+w1:2,s0 &Erfahrung // rewritten into meine.* RegexpQuery wcquery = new RegexpQuery(new Term("tokens", "s:meine.*")); SpanMultiTermQueryWrapper<RegexpQuery> mtq = new SpanMultiTermQueryWrapper<RegexpQuery>(wcquery); SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(4, kr.getMatches().size()); }
@Test public void testWildcardQuestionMarkRewritten () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // meine? /+w1:2,s0 &Erfahrung // meine? rewritten into meine. SpanMultiTermQueryWrapper<RegexpQuery> mtq = new SpanMultiTermQueryWrapper<RegexpQuery>( new RegexpQuery(new Term("tokens", "s:meine."))); SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(3, kr.getMatches().size()); }
@Test public void testWildcardPlusRewritten () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // C2 meine+ /+w1:2,s0 &Erfahrung // meine+ rewritten into meine.? SpanMultiTermQueryWrapper<RegexpQuery> mtq = new SpanMultiTermQueryWrapper<RegexpQuery>( new RegexpQuery(new Term("tokens", "s:meine.?"))); SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(4, kr.getMatches().size()); }
@Test public void testWildcardPlusRewritten2 () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // C2 mein+ /+w1:2,s0 &Erfahrung // mein+ rewritten into mein.? SpanMultiTermQueryWrapper<RegexpQuery> mtq = new SpanMultiTermQueryWrapper<RegexpQuery>( new RegexpQuery(new Term("tokens", "s:mein.?"))); SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(2, kr.getMatches().size()); }
@Test public void testWildcardStarWithCollection () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // meine* WildcardQuery wcquery = new WildcardQuery(new Term("tokens", "s:meine*")); SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery); // meine* /+w1:2,s0 &Erfahrung SpanQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(4, kr.getMatches().size()); }
@Test public void testWildcardQuestionMark1 () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // Wildcard ? means regex . (expects exactly one character) SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>( new WildcardQuery(new Term("tokens", "s:meine?"))); SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(3, kr.getMatches().size()); }
@Test public void testWildcardQuestionMark2 () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // Wildcard ? means regex . (expects exactly one character) SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>( new WildcardQuery(new Term("tokens", "s:mein?"))); SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(1, kr.getMatches().size()); }
@Test public void testRelationWithRegex () throws IOException { ki.addDoc(createFieldDoc0()); ki.addDoc(createFieldDoc3()); ki.commit(); SpanQuery sq; sq = new SpanRelationQuery( new SpanMultiTermQueryWrapper<RegexQuery>( new RegexQuery(new Term("base", ">:xip/.*"))), true, RelationDirection.RIGHT); kr = ki.search(sq, (short) 10); assertEquals((long) 7, kr.getTotalResults()); sq = new SpanRelationQuery( new SpanMultiTermQueryWrapper<RegexQuery>( new RegexQuery(new Term("base", "<:xip/.*"))), true, RelationDirection.LEFT); kr = ki.search(sq, (short) 10); assertEquals((long) 7, kr.getTotalResults()); }
private SpanQuery wrapWildcardTerms(org.apache.lucene.index.Term term) { String termText = term.text(); SpanQuery nextSpanQuery; if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery(term); SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(wildQuery); wrapper.setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit)); nextSpanQuery = wrapper; } else { nextSpanQuery = new SpanTermQuery(term); } return nextSpanQuery; }
@SuppressWarnings("rawtypes") @Override public void assertInstanceOf(Query q, Class other) { if (q instanceof SpanMultiTermQueryWrapper) { q = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); } else if (q instanceof SpanTermQuery && other.equals(TermQuery.class)) { assertTrue("termquery", true); return; } else if (q instanceof SpanNearQuery && other.equals(PhraseQuery.class)) { assertTrue("spannear/phrase", true); return; } else if (q instanceof SpanOrQuery && other.equals(BooleanQuery.class)) { assertTrue("spanor/boolean", true); return; } super.assertInstanceOf(q, other); }
private SpanQuery asSpanQuery(BooleanClause query) { if (query.getQuery() instanceof MultiTermQuery) { return new SpanMultiTermQueryWrapper<>((MultiTermQuery) query.getQuery()); } else { Set<Term> terms = new HashSet<>(); try { indexSearcher.createWeight(query.getQuery(), false).extractTerms(terms); } catch (IOException e) { throw new RuntimeException(e); } if (terms.size() != 1) { throw new RuntimeException("Expected term set of size 1: " + terms); } return new SpanTermQuery(terms.iterator().next()); } }
private Query spanFilter(SpanQuery query) { if (query instanceof SpanNearQuery) { return spanNearFilter((SpanNearQuery) query); } else if (query instanceof SpanNotQuery) { return spanNotFilter((SpanNotQuery) query); } else if (query instanceof SpanOrQuery) { return spanOrFilter((SpanOrQuery) query); } else if (query instanceof SpanTermQuery) { return new TermQuery(((SpanTermQuery) query).getTerm()); } else if (query instanceof SpanMultiTermQueryWrapper) { return ((SpanMultiTermQueryWrapper) query).getWrappedQuery(); } else { return new QueryWrapperFilter(query); } }
public SpanRegexQueryWrapper (String field, String re, int flags, boolean caseinsensitive) { if (caseinsensitive) { if (re.startsWith("s:")) { re = re.replaceFirst("s:", "i:"); }; re = re.toLowerCase(); }; RegexpQuery requery = new RegexpQuery(new Term(field, re), flags); query = new SpanMultiTermQueryWrapper<RegexpQuery>(requery); }
public SpanWildcardQueryWrapper (String field, String wc, boolean caseinsensitive) { if (caseinsensitive) { if (wc.startsWith("s:")) { wc = wc.replaceFirst("s:", "i:"); }; wc = wc.toLowerCase(); }; WildcardQuery wcquery = new WildcardQuery(new Term(field, wc)); query = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery); }
@Test public void testWildcardPlusWithCollection () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); // mein+ /+w1:2,s0 &Erfahrung SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>( new WildcardQuery(new Term("tokens", "s:mein+"))); // Just to make sure, Lucene internal queries treat SpanOr([]) correctly SpanQuery soq = new SpanNearQuery(new SpanQuery[] { mtq, sq }, 1, true); kr = ki.search(soq, (short) 10); // As described in http://korap.github.io/Koral/, '+' is not a valid wildcard assertEquals(0, kr.getMatches().size()); // Check the reported classed query SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(0, kr.getMatches().size()); // Check multiple distance query mdsq = new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true); kr = ki.search(mdsq, (short) 10); assertEquals(0, kr.getMatches().size()); }
public void testSpanWildcard() throws Exception { Directory dir = newDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter() { @Override protected Analyzer getIndexAnalyzer(String field) { return analyzer; } }; Query query = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); String snippets[] = highlighter.highlight("body", query, searcher, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.close(); dir.close(); }
public void testNullPointerException() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<>(regex); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This throws an NPE assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testPassesIfWrapped() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<>(regex)); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This passes though assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testSpanRegex() throws Exception { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", // Field.Store.NO, Field.Index.ANALYZED)); // writer.addDocument(doc); // doc = new Document(); doc.add(newTextField("field", "auto update", Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(newTextField("field", "first auto update", Field.Store.NO)); writer.addDocument(doc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = newSearcher(reader); SpanQuery srq = new SpanMultiTermQueryWrapper<>(new RegexQuery(new Term("field", "aut.*"))); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, // true); int numHits = searcher.search(sfq, null, 1000).totalHits; assertEquals(1, numHits); reader.close(); directory.close(); }
private int spanRegexQueryNrHits(String regex1, String regex2, int slop, boolean ordered) throws Exception { SpanQuery srq1 = new SpanMultiTermQueryWrapper<>(new RegexQuery(newTerm(regex1))); SpanQuery srq2 = new SpanMultiTermQueryWrapper<>(new RegexQuery(newTerm(regex2))); SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered); return searcher.search(query, null, 1000).totalHits; }
public void testNullPointerException() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<RegexpQuery>(regex); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there"))); // This throws an NPE assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testPassesIfWrapped() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(regex)); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there"))); // This passes though assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testSpanRegex() throws Exception { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", // Field.Store.NO, Field.Index.ANALYZED)); // writer.addDocument(doc); // doc = new Document(); doc.add(newTextField("field", "auto update", Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(newTextField("field", "first auto update", Field.Store.NO)); writer.addDocument(doc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); SpanQuery srq = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(new Term("field", "aut.*"))); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, // true); int numHits = searcher.search(sfq, null, 1000).totalHits; assertEquals(1, numHits); reader.close(); directory.close(); }
private int spanRegexQueryNrHits(String regex1, String regex2, int slop, boolean ordered) throws Exception { SpanQuery srq1 = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(newTerm(regex1))); SpanQuery srq2 = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(newTerm(regex2))); SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered); return searcher.search(query, null, 1000).totalHits; }
@Override public void assertQueryEquals(CommonQueryParserConfiguration cqpC, String field, String query, String result) throws Exception { Query q = getQuery(query, cqpC); if (q instanceof SpanMultiTermQueryWrapper) { @SuppressWarnings("rawtypes") Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); //TODO: we used to set boost here q = tmp; } assertEquals(result, q.toString(field)); }
@Override public void assertQueryEquals(String query, Analyzer a, String result) throws Exception { Query q = getQuery(query, a); if (q instanceof SpanMultiTermQueryWrapper) { @SuppressWarnings("rawtypes") Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); //TODO: we used to set boost here q = tmp; } else if (q instanceof SpanOrQuery){ if (((SpanOrQuery)q).getClauses().length == 0){ q = new BooleanQuery.Builder().build(); } } assertEquals(result, q.toString("field")); }
public void assertQueryEqualsCMP(String query, Analyzer a, String result) throws Exception { Query q = getQuery(query, a); if (q instanceof SpanMultiTermQueryWrapper){ @SuppressWarnings("rawtypes") Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); //TODO we used to set boost here q = tmp; } else if (q instanceof SpanOrQuery){ if (((SpanOrQuery)q).getClauses().length == 0){ q = new BooleanQuery.Builder().build(); } } assertEquals(result, q.toString("field")); }
@Test public void testRewrites() throws Exception { //test to make sure that queries are rewritten //first test straight prefix queries String[] docs = new String[]{"aa ba ca aa ba ca", "ca ba aa ca ba aa da ea za", "ca ba aa ca ba aa ea aa ba ca za"}; Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); ConcordanceSearcher searcher = new ConcordanceSearcher( new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD))); BooleanQuery q = new BooleanQuery.Builder() .add(new PrefixQuery(new Term(FIELD, "a")), Occur.MUST) .add(new PrefixQuery(new Term(FIELD, "d")), Occur.MUST_NOT).build(); //now test straight and span wrapper ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10); searcher.search(indexSearcher, FIELD, q, new PrefixQuery(new Term(FIELD, "z")), analyzer, collector); // shouldn't include document with "da", but must include one with za assertEquals(3, collector.size()); collector = new ConcordanceWindowCollector(10); searcher.search(indexSearcher, FIELD, q, new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(FIELD, "z"))), analyzer, collector); // shouldn't include document with "da", but must include one with za assertEquals(3, collector.size()); reader.close(); directory.close(); }
public void testNullPointerException() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<RegexpQuery>(regex); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This throws an NPE assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testPassesIfWrapped() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(regex)); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This passes though assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testSpanRegex() throws Exception { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", // Field.Store.NO, Field.Index.ANALYZED)); // writer.addDocument(doc); // doc = new Document(); doc.add(newTextField("field", "auto update", Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(newTextField("field", "first auto update", Field.Store.NO)); writer.addDocument(doc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = newSearcher(reader); SpanQuery srq = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(new Term("field", "aut.*"))); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, // true); int numHits = searcher.search(sfq, null, 1000).totalHits; assertEquals(1, numHits); reader.close(); directory.close(); }
/** * Translate custom queries in queries that are supported by the unified highlighter. */ private Collection<Query> rewriteCustomQuery(Query query) { if (query instanceof MultiPhrasePrefixQuery) { MultiPhrasePrefixQuery mpq = (MultiPhrasePrefixQuery) query; Term[][] terms = mpq.getTerms(); int[] positions = mpq.getPositions(); SpanQuery[] positionSpanQueries = new SpanQuery[positions.length]; int sizeMinus1 = terms.length - 1; for (int i = 0; i < positions.length; i++) { SpanQuery[] innerQueries = new SpanQuery[terms[i].length]; for (int j = 0; j < terms[i].length; j++) { if (i == sizeMinus1) { innerQueries[j] = new SpanMultiTermQueryWrapper(new PrefixQuery(terms[i][j])); } else { innerQueries[j] = new SpanTermQuery(terms[i][j]); } } if (innerQueries.length > 1) { positionSpanQueries[i] = new SpanOrQuery(innerQueries); } else { positionSpanQueries[i] = innerQueries[0]; } } // sum position increments beyond 1 int positionGaps = 0; if (positions.length >= 2) { // positions are in increasing order. max(0,...) is just a safeguard. positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1); } //if original slop is 0 then require inOrder boolean inorder = (mpq.getSlop() == 0); return Collections.singletonList(new SpanNearQuery(positionSpanQueries, mpq.getSlop() + positionGaps, inorder)); } else if (query instanceof CommonTermsQuery) { CommonTermsQuery ctq = (CommonTermsQuery) query; List<Query> tqs = new ArrayList<> (); for (Term term : ctq.getTerms()) { tqs.add(new TermQuery(term)); } return tqs; } else if (query instanceof AllTermQuery) { AllTermQuery atq = (AllTermQuery) query; return Collections.singletonList(new TermQuery(atq.getTerm())); } else if (query instanceof FunctionScoreQuery) { return Collections.singletonList(((FunctionScoreQuery) query).getSubQuery()); } else if (query instanceof FiltersFunctionScoreQuery) { return Collections.singletonList(((FiltersFunctionScoreQuery) query).getSubQuery()); } else { return null; } }
@Test public void testMultipleDistanceWithWildcards () throws IOException, QueryException { WildcardQuery wcquery = new WildcardQuery(new Term("tokens", "s:meine*")); SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery); // meine* /+w1:2 &Erfahrung SpanQuery tdq = new SpanDistanceQuery(mtq, sq, TestMultipleDistanceIndex .createConstraint("w", 1, 2, true, false), true); kr = sample.search(tdq, (short) 10); assertEquals(4, kr.getMatches().size()); assertEquals(107, kr.getMatch(0).getStartPos()); assertEquals(109, kr.getMatch(0).getEndPos()); assertEquals(132566, kr.getMatch(1).getStartPos()); assertEquals(132569, kr.getMatch(1).getEndPos()); assertEquals(161393, kr.getMatch(2).getStartPos()); assertEquals(161396, kr.getMatch(2).getEndPos()); assertEquals(10298, kr.getMatch(3).getStartPos()); assertEquals(10301, kr.getMatch(3).getEndPos()); // meine* /+s0 &Erfahrung SpanQuery edq = new SpanDistanceQuery(mtq, sq, TestMultipleDistanceIndex .createConstraint("tokens", "base/s:s", 0, 0, true, false), true); kr = sample.search(edq, (short) 20); assertEquals(18, kr.getMatches().size()); //meine* /+w1:2,s0 &Erfahrung SpanQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); kr = sample.search(mdsq, (short) 10); assertEquals(4, kr.getMatches().size()); // check SpanQueryWrapper generated query SpanQueryWrapper sqwi = getJSONQuery( getClass().getResource("/queries/bugs/cosmas_wildcards.jsonld") .getFile()); SpanQuery jsq = sqwi.toQuery(); assertEquals(mdsq.toString(), jsq.toString()); }
@Test public void testWildcardStarWithCollection () throws IOException { // meine* WildcardQuery wcquery = new WildcardQuery(new Term("tokens", "s:meine*")); SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery); // meine* /+w1:2,s0 &Erfahrung SpanQuery mdsq = new SpanMultipleDistanceQuery( new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true); krillAvailabilityAll.setSpanQuery(mdsq); kr = sample.search(krillAvailabilityAll); assertEquals(4, kr.getMatches().size()); assertEquals("match-GOE/AGI/04846-p107-109", kr.getMatch(0).getID()); assertEquals("QAO-NC-LOC:ids", kr.getMatch(0).getAvailability()); assertEquals( "... gelesen und erzählt hat, ich in " + "[[meine Erfahrungen]] hätte mit aufnehmen sollen. " + "heute jedoch ...", kr.getMatch(0).getSnippetBrackets()); assertEquals("match-GOE/AGD/00000-p132566-132569", kr.getMatch(1).getID()); assertEquals("QAO-NC-LOC:ids-NU:1", kr.getMatch(1).getAvailability()); assertEquals("... Mannes umständlich beibringen und solches " + "durch [[meine eigne Erfahrung]] bekräftigen: das " + "alles sollte nicht gelten ...", kr.getMatch(1).getSnippetBrackets()); assertEquals("match-GOE/AGD/00000-p161393-161396", kr.getMatch(2).getID()); assertEquals("QAO-NC-LOC:ids-NU:1", kr.getMatch(2).getAvailability()); assertEquals("... lassen, bis er sich zuletzt an " + "[[meine sämtlichen Erfahrungen]] und Überzeugungen " + "anschloß, in welchem Sinne ...", kr.getMatch(2).getSnippetBrackets()); assertEquals("match-GOE/AGD/06345-p10298-10301", kr.getMatch(3).getID()); assertEquals("QAO-NC", kr.getMatch(3).getAvailability()); assertEquals("... bis aufs Äußerste verfolgte, und, über " + "[[meine enge Erfahrung]] hinaus, nach ähnlichen Fällen " + "in der ...", kr.getMatch(3).getSnippetBrackets()); }
@Test public void testQueryWithWildCard () throws IOException { // meine* /+w1:2,s0 &Erfahrung ki = new KrillIndex(); ki.addDoc(createFieldDoc5()); ki.commit(); // Check simple rewriting WildcardQuery wcquery = new WildcardQuery(new Term("tokens", "s:Meine*")); SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery); assertEquals(wcquery.toString(), "tokens:s:Meine*"); kr = ki.search(mtq, (short) 10); assertEquals(4, kr.getMatches().size()); assertEquals(0, kr.getMatch(0).getStartPos()); assertEquals(1, kr.getMatch(0).getEndPos()); // Check rewriting in multidistance query SpanQuery sq = new SpanTermQuery(new Term("tokens", "l:Erfahrung")); kr = ki.search(sq, (short) 10); assertEquals(4, kr.getMatches().size()); List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>(); constraints.add(createConstraint("w", 1, 2, true, false)); constraints.add(createConstraint("tokens", "s", 0, 0, true, false)); SpanQuery mdsq = new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true); assertEquals(mdsq.toString(), "spanMultipleDistance(SpanMultiTermQueryWrapper(tokens:s:Meine*), " + "tokens:l:Erfahrung, [(w[1:2], ordered, notExcluded), (s[0:0], " + "ordered, notExcluded)])"); kr = ki.search(mdsq, (short) 10); assertEquals(3, kr.getMatches().size()); assertEquals(0, kr.getMatch(0).getStartPos()); assertEquals(2, kr.getMatch(0).getEndPos()); // Check skipping with multiple documents ki.addDoc(createFieldDoc6()); ki.addDoc(createFieldDoc7()); ki.addDoc(createFieldDoc8()); ki.commit(); kr = ki.search(mdsq, (short) 10); assertEquals(6, kr.getMatches().size()); }
/** * Query rewrite bug * * Warning: This is not armoured by <base/s=t>! * * @throws IOException */ @Test public void testQueryRewriteBug () throws IOException { KrillIndex ki = new KrillIndex(); ki.addDoc(createFieldDoc0()); // ceccecdeec /* ki.addDoc(createFieldDoc1()); // bbccdd || only not clause ki.addDoc(createFieldDoc2()); // beccea | only main clause */ ki.commit(); // See /queries/bugs/repetition_group_rewrite RegexpQuery requery = new RegexpQuery(new Term("base", "s:[ac]"), RegExp.ALL); SpanMultiTermQueryWrapper<RegexpQuery> query = new SpanMultiTermQueryWrapper<RegexpQuery>( requery); SpanExpansionQuery seq = new SpanExpansionQuery(query, 1, 1, 1, true); SpanRepetitionQuery rep = new SpanRepetitionQuery(seq, 2, 2, true); // spanRepetition( // spanExpansion( // SpanMultiTermQueryWrapper(base:/s:[ac]/), // []{1, 1}, // right // ){2,2} // ) kr = ki.search(query, (short) 20); assertEquals(5, kr.getTotalResults()); assertEquals(0, kr.getMatch(0).getStartPos()); assertEquals(1, kr.getMatch(0).getEndPos()); assertEquals(2, kr.getMatch(1).getStartPos()); assertEquals(3, kr.getMatch(1).getEndPos()); assertEquals(3, kr.getMatch(2).getStartPos()); assertEquals(4, kr.getMatch(2).getEndPos()); assertEquals(5, kr.getMatch(3).getStartPos()); assertEquals(6, kr.getMatch(3).getEndPos()); assertEquals(9, kr.getMatch(4).getStartPos()); assertEquals(10, kr.getMatch(4).getEndPos()); kr = ki.search(seq, (short) 20); assertEquals(5, kr.getTotalResults()); assertEquals(0, kr.getMatch(0).getStartPos()); assertEquals(2, kr.getMatch(0).getEndPos()); assertEquals(2, kr.getMatch(1).getStartPos()); assertEquals(4, kr.getMatch(1).getEndPos()); assertEquals(3, kr.getMatch(2).getStartPos()); assertEquals(5, kr.getMatch(2).getEndPos()); assertEquals(5, kr.getMatch(3).getStartPos()); assertEquals(7, kr.getMatch(3).getEndPos()); assertEquals(9, kr.getMatch(4).getStartPos()); assertEquals(11, kr.getMatch(4).getEndPos()); kr = ki.search(rep, (short) 20); // for (Match km : kr.getMatches()){ // System.out.println( // km.getStartPos() + // "," + // km.getEndPos() + // " " + // km.getSnippetBrackets() // ); // }; assertEquals("[[cecc]]ecdeec", kr.getMatch(0).getSnippetBrackets()); assertEquals("cec[[cecd]]eec", kr.getMatch(1).getSnippetBrackets()); assertEquals((long) 2, kr.getTotalResults()); }
public void testSpanOr() throws Exception { Directory dir = newDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter() { @Override protected Analyzer getIndexAnalyzer(String field) { return analyzer; } }; SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); Query query = new SpanOrQuery(new SpanQuery[] { childQuery }); TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); String snippets[] = highlighter.highlight("body", query, searcher, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.close(); dir.close(); }
public void testSpanNear() throws Exception { Directory dir = newDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter() { @Override protected Analyzer getIndexAnalyzer(String field) { return analyzer; } }; SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))); Query query = new SpanNearQuery(new SpanQuery[] { childQuery }, 0, true); TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); String snippets[] = highlighter.highlight("body", query, searcher, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.close(); dir.close(); }