@Override public Query build(QueryNode queryNode) throws QueryNodeException { SlopQueryNode phraseSlopNode = (SlopQueryNode) queryNode; Query query = (Query) phraseSlopNode.getChild().getTag( QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(phraseSlopNode.getValue()); } else { ((MultiPhraseQuery) query).setSlop(phraseSlopNode.getValue()); } return query; }
@Override void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException { if (sourceQuery instanceof SpanTermQuery) { super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost); } else if (sourceQuery instanceof ConstantScoreQuery) { flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost); } else if (sourceQuery instanceof FunctionScoreQuery) { flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost); } else if (sourceQuery instanceof MultiPhrasePrefixQuery) { flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost); } else if (sourceQuery instanceof FiltersFunctionScoreQuery) { flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost); } else if (sourceQuery instanceof MultiPhraseQuery) { MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery); convertMultiPhraseQuery(0, new int[q.getTermArrays().size()], q, q.getTermArrays(), q.getPositions(), reader, flatQueries); } else if (sourceQuery instanceof BlendedTermQuery) { final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery; flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost); } else { super.flatten(sourceQuery, reader, flatQueries, boost); } }
private Query applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery) q; PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); final Term[] terms = pq.getTerms(); final int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } pq = builder.build(); pq.setBoost(q.getBoost()); return pq; } else if (q instanceof MultiPhraseQuery) { ((MultiPhraseQuery) q).setSlop(slop); return q; } else { return q; } }
public void testMultiPhraseQuery() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new CannedAnalyzer())); Document doc = new Document(); doc.add(newTextField("field", "", Field.Store.NO)); w.addDocument(doc); IndexReader r = DirectoryReader.open(w, true); IndexSearcher s = newSearcher(r); Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field"); assertTrue(q instanceof MultiPhraseQuery); assertEquals(1, s.search(q, 10).totalHits); r.close(); w.close(); dir.close(); }
public void testMultiPhraseQueryParsing() throws Exception { TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[]{ new TokenAndPos("a", 0), new TokenAndPos("1", 0), new TokenAndPos("b", 1), new TokenAndPos("1", 1), new TokenAndPos("c", 2) }; QueryParser qp = new QueryParser("field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); Query q = qp.parse("\"this text is acually ignored\""); assertTrue("wrong query type!", q instanceof MultiPhraseQuery); MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.add(new Term[]{ new Term("field", "a"), new Term("field", "1") }, -1); multiPhraseQuery.add(new Term[]{ new Term("field", "b"), new Term("field", "1") }, 0); multiPhraseQuery.add(new Term[]{ new Term("field", "c") }, 1); assertEquals(multiPhraseQuery, q); }
/** * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. * This method may be overridden, for example, to return * a SpanNearQuery instead of a PhraseQuery. * */ protected Query getFieldQuery(String field, String queryText, int slop) throws SyntaxError { Query query = getFieldQuery(field, queryText, true); // only set slop of the phrase query was a result of this parser // and not a sub-parser. if (subQParser == null) { if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(slop); } if (query instanceof MultiPhraseQuery) { ((MultiPhraseQuery) query).setSlop(slop); } } return query; }
/** * Base implementation delegates to * {@link #getFieldQuery(String,String,boolean)}. This method may be * overridden, for example, to return a SpanNearQuery instead of a * PhraseQuery. * * @exception org.apache.lucene.queryparser.classic.ParseException * throw in overridden method to disallow */ protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { Query query = getFieldQuery(field, queryText, true); if (query instanceof PhraseQuery) { query = addSlopToPhrase((PhraseQuery) query, slop); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) query; if (slop != mpq.getSlop()) { query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } return query; }
public void testMultiPhraseQuery() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer())); Document doc = new Document(); doc.add(newTextField("field", "", Field.Store.NO)); w.addDocument(doc); IndexReader r = DirectoryReader.open(w, true); IndexSearcher s = newSearcher(r); Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field"); assertTrue(q instanceof MultiPhraseQuery); assertEquals(1, s.search(q, 10).totalHits); r.close(); w.close(); dir.close(); }
public void testMultiPhraseQueryParsing() throws Exception { TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[]{ new TokenAndPos("a", 0), new TokenAndPos("1", 0), new TokenAndPos("b", 1), new TokenAndPos("1", 1), new TokenAndPos("c", 2) }; QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); Query q = qp.parse("\"this text is acually ignored\""); assertTrue("wrong query type!", q instanceof MultiPhraseQuery); MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.add(new Term[]{ new Term("field", "a"), new Term("field", "1") }, -1); multiPhraseQuery.add(new Term[]{ new Term("field", "b"), new Term("field", "1") }, 0); multiPhraseQuery.add(new Term[]{ new Term("field", "c") }, 1); assertEquals(multiPhraseQuery, q); }
private Query applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); PhraseQuery pq = (PhraseQuery) q; org.apache.lucene.index.Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } q = builder.build(); } else if (q instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) q; if (slop != mpq.getSlop()) { q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } return q; }
public void testBasic() throws Exception { MultiPhraseQuery query = new MultiPhraseQuery(); query.add(new Term[] { // #A new Term("field", "quick"), // #A new Term("field", "fast") // #A }); query.add(new Term("field", "fox")); // #B LOGGER.info(query); TopDocs hits = searcher.search(query, 10); assertEquals("fast fox match", 1, hits.totalHits); query.setSlop(1); hits = searcher.search(query, 10); assertEquals("both match", 2, hits.totalHits); }
public void testQueryParser() throws Exception { SynonymEngine engine = new SynonymEngine() { public String[] getSynonyms(String s) { if (s.equals("quick")) return new String[] {"fast"}; else return null; } }; Query q = new QueryParser(Version.LUCENE_41, "field", new SynonymAnalyzer(engine)) .parse("\"quick fox\""); assertEquals("analyzed", "field:\"(quick fast) fox\"", q.toString()); assertTrue("parsed as MultiPhraseQuery", q instanceof MultiPhraseQuery); }
static Function<Query, Result> multiPhraseQuery() { return query -> { Term[][] terms = ((MultiPhraseQuery) query).getTermArrays(); if (terms.length == 0) { return new Result(true, Collections.emptySet()); } Set<Term> bestTermArr = null; for (Term[] termArr : terms) { bestTermArr = selectTermListWithTheLongestShortestTerm(bestTermArr, new HashSet<>(Arrays.asList(termArr))); } return new Result(false, bestTermArr); }; }
public void testExtractQueryMetadata_multiPhraseQuery() { MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder() .add(new Term("_field", "_long_term")) .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_term")}) .add(new Term[] {new Term("_field", "_long_term"), new Term("_field", "_very_long_term")}) .add(new Term[] {new Term("_field", "_very_long_term")}) .build(); Result result = analyze(multiPhraseQuery); assertThat(result.verified, is(false)); List<Term> terms = new ArrayList<>(result.terms); assertThat(terms.size(), equalTo(1)); assertThat(terms.get(0).field(), equalTo("_field")); assertThat(terms.get(0).bytes().utf8ToString(), equalTo("_very_long_term")); }
@Override public Query rewrite(IndexReader reader) throws IOException { Query rewritten = super.rewrite(reader); if (rewritten != this) { return rewritten; } if (termArrays.isEmpty()) { return new MatchNoDocsQuery(); } MultiPhraseQuery.Builder query = new MultiPhraseQuery.Builder(); query.setSlop(slop); int sizeMinus1 = termArrays.size() - 1; for (int i = 0; i < sizeMinus1; i++) { query.add(termArrays.get(i), positions.get(i)); } Term[] suffixTerms = termArrays.get(sizeMinus1); int position = positions.get(sizeMinus1); ObjectHashSet<Term> terms = new ObjectHashSet<>(); for (Term term : suffixTerms) { getPrefixTerms(terms, term, reader); if (terms.size() > maxExpansions) { break; } } if (terms.isEmpty()) { // if the terms does not exist we could return a MatchNoDocsQuery but this would break the unified highlighter // which rewrites query with an empty reader. return new BooleanQuery.Builder() .add(query.build(), BooleanClause.Occur.MUST) .add(Queries.newMatchNoDocsQuery("No terms supplied for " + MultiPhrasePrefixQuery.class.getName()), BooleanClause.Occur.MUST).build(); } query.add(terms.toArray(Term.class), position); return query.build(); }
/** * Stops the MultiPhraseQuery from being re-written as a BooleanQuery which * could potentially hit the BooleanQuery.maxClauseCount of 1024 */ private MultiPhraseQuery getMultiPhrase() { MultiPhraseQuery parsed = new MultiPhraseQuery() { private static final long serialVersionUID = 1L; @Override public Query rewrite(IndexReader reader) { return this; } }; return parsed; }
private void addExpandedTerms(IndexReader reader, MultiPhraseQuery query, String lastTerm, int count) throws IOException { final Term[] expandedTerms = expand(reader, FreeTextQuery.FIELD_NAME_AUTOCOMPLETE, lastTerm); if( expandedTerms.length > 0 ) { query.add(expandedTerms, count); } else if( !lastTerm.isEmpty() ) { query.add(new Term[]{new Term(FreeTextQuery.FIELD_NAME_AUTOCOMPLETE, lastTerm)}, count); } }
private void applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { ((PhraseQuery) q).setSlop(slop); } else if (q instanceof MultiPhraseQuery) { ((MultiPhraseQuery) q).setSlop(slop); } }
@Override public Query rewrite(IndexReader reader) throws IOException { if (getBoost() != 1.0F) { return super.rewrite(reader); } if (termArrays.isEmpty()) { return new MatchNoDocsQuery(); } MultiPhraseQuery query = new MultiPhraseQuery(); query.setSlop(slop); int sizeMinus1 = termArrays.size() - 1; for (int i = 0; i < sizeMinus1; i++) { query.add(termArrays.get(i), positions.get(i)); } Term[] suffixTerms = termArrays.get(sizeMinus1); int position = positions.get(sizeMinus1); ObjectHashSet<Term> terms = new ObjectHashSet<>(); for (Term term : suffixTerms) { getPrefixTerms(terms, term, reader); if (terms.size() > maxExpansions) { break; } } if (terms.isEmpty()) { return Queries.newMatchNoDocsQuery(); } query.add(terms.toArray(Term.class), position); query.setBoost(getBoost()); return query.rewrite(reader); }
/** * Base implementation delegates to {@link #getFieldQuery(String,String)}. This method may be overridden, for example, to return a * SpanNearQuery instead of a PhraseQuery. * * @exception ParseException throw in overridden method to disallow */ protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { Query query = getFieldQuery(field, queryText); if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(slop); } if (query instanceof MultiPhraseQuery) { ((MultiPhraseQuery) query).setSlop(slop); } return query; }
public Query rewrite(IndexReader reader) throws IOException { MultiPhraseQuery multi = new MultiPhraseQuery(); for (int i = 0; i < producers.size(); i++) { Term[] terms = producers.get(i).getTerms(reader); if (terms.length < 1) { return matchNoDocsQuery; } multi.add(terms, positions.get(i)); } return multi.rewrite(reader); }
/** forms multiphrase query */ public void testSynonymsPhrase() throws Exception { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "old")); expected.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") }); QueryParser qp = new QueryParser("field", new MockSynonymAnalyzer()); assertEquals(expected, qp.parse("\"old dogs\"")); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("\"old dogs\"")); expected.setBoost(2.0f); assertEquals(expected, qp.parse("\"old dogs\"^2")); expected.setSlop(3); assertEquals(expected, qp.parse("\"old dogs\"~3^2")); }
/** forms multiphrase query */ public void testCJKSynonymsPhrase() throws Exception { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "中")); expected.add(new Term[] { new Term("field", "国"), new Term("field", "國")}); QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("\"中国\"")); expected.setBoost(2.0f); assertEquals(expected, qp.parse("\"中国\"^2")); expected.setSlop(3); assertEquals(expected, qp.parse("\"中国\"~3^2")); }
/** forms multiphrase query */ public void testSynonymsPhrase() throws Exception { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "old")); expected.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") }); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); assertEquals(expected, builder.createPhraseQuery("field", "old dogs")); }
/** forms multiphrase query */ public void testCJKSynonymsPhrase() throws Exception { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.add(new Term("field", "中")); expected.add(new Term[] { new Term("field", "国"), new Term("field", "國")}); QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer()); assertEquals(expected, builder.createPhraseQuery("field", "中国")); expected.setSlop(3); assertEquals(expected, builder.createPhraseQuery("field", "中国", 3)); }
/** * Creates complex phrase query from the cached tokenstream contents */ protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder(); mpqb.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; List<Term> multiTerms = new ArrayList<>(); stream.reset(); while (stream.incrementToken()) { int positionIncrement = posIncrAtt.getPositionIncrement(); if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpqb.add(multiTerms.toArray(new Term[0]), position); } else { mpqb.add(multiTerms.toArray(new Term[0])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(new Term(field, termAtt.getBytesRef())); } if (enablePositionIncrements) { mpqb.add(multiTerms.toArray(new Term[0]), position); } else { mpqb.add(multiTerms.toArray(new Term[0])); } return mpqb.build(); }