@Override public Query rewrite(IndexReader reader) throws IOException { XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity); mlt.setFieldNames(moreLikeFields); mlt.setAnalyzer(analyzer); mlt.setMinTermFreq(minTermFrequency); mlt.setMinDocFreq(minDocFreq); mlt.setMaxDocFreq(maxDocFreq); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinWordLen(minWordLen); mlt.setMaxWordLen(maxWordLen); mlt.setStopWords(stopWords); mlt.setBoost(boostTerms); mlt.setBoostFactor(boostTermsFactor); if (this.unlikeText != null || this.unlikeFields != null) { handleUnlike(mlt, this.unlikeText, this.unlikeFields); } return createQuery(mlt); }
private Query createPhraseQuery(EntityDisambiguationDPO dpo, EntityCentricKnowledgeBase kb) { LearnToRankQuery query = new LearnToRankQuery(); List<LearnToRankClause> features = new LinkedList<LearnToRankClause>(); DefaultSimilarity defaultSim = new DefaultSimilarity(); LTRBooleanQuery bq = new LTRBooleanQuery(); bq.add(LuceneFeatures.queryLabelTerm(dpo.getSelectedText(), "UniqueLabelString", defaultSim), Occur.SHOULD); bq.add(LuceneFeatures.queryLabelTerm(dpo.getSelectedText(), "Label", defaultSim), Occur.SHOULD); // Feature 1 features.add(query.add(bq, "Feature1", true)); // Feature 2 features.add(query.add( LuceneFeatures.querySensePrior(dpo.getSelectedText(), kb.getFeatureDefinition()), "Feature2", false)); features.get(0).setWeight(1f); features.get(1).setWeight(1f); return query; }
private Query createFuzzyQuery(EntityDisambiguationDPO dpo, EntityCentricKnowledgeBase kb) { LearnToRankQuery query = new LearnToRankQuery(); List<LearnToRankClause> features = new LinkedList<LearnToRankClause>(); DefaultSimilarity defaultSim = new DefaultSimilarity(); // Feature 1 features.add(query.add(LuceneFeatures.queryStringTerm( dpo.getSelectedText(), "Label", defaultSim, Occur.SHOULD, DisambiguationMainService.MAXCLAUSECOUNT), "Feature1", true)); // Feature 2 features.add(query.add( LuceneFeatures.querySensePrior(dpo.getSelectedText(), kb.getFeatureDefinition()), "Feature2", false)); features.get(0).setWeight(0.0915161f); features.get(1).setWeight(0.350994f); return query; }
private ArrayList<String> queryLucene(String surfaceForm) { ArrayList<String> list = new ArrayList<String>(); final IndexSearcher searcher = eckb.getSearcher(); final IndexReader reader = searcher.getIndexReader(); LearnToRankQuery query = new LearnToRankQuery(); List<LearnToRankClause> features = new LinkedList<LearnToRankClause>(); DefaultSimilarity defaultSim = new DefaultSimilarity(); features.add(query.add(LuceneFeatures.queryLabelTerm(surfaceForm, "UniqueLabel", defaultSim), "Feature1", true)); try { final TopDocs top = searcher.search(query, 150); final ScoreDoc[] score = top.scoreDocs; if (score.length <= 5) { for (int i = 0; i < score.length; ++i) { final Document doc = reader.document(score[i].doc); list.add(doc.get("Mainlink")); } } } catch (IOException e) { e.printStackTrace(); } return list; }
public void testReallyNoNormsForDrillDown() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity sim = new DefaultSimilarity(); @Override public Similarity get(String name) { assertEquals("field", name); return sim; } }); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); doc.add(new FacetField("a", "path")); writer.addDocument(config.build(taxoWriter, doc)); IOUtils.close(writer, taxoWriter, dir, taxoDir); }
public void testCustomIDF() throws Exception { Directory indexStore = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore); add("This is a test", "object", writer); add("a note", "note", writer); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new DefaultSimilarity() { @Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { return new Explanation(10f, "just a test"); } }); MultiPhraseQuery query = new MultiPhraseQuery(); query.add(new Term[] { new Term("body", "this"), new Term("body", "that") }); query.add(new Term("body", "is")); Weight weight = query.createWeight(searcher); assertEquals(10f * 10f, weight.getValueForNormalization(), 0.001f); writer.close(); reader.close(); indexStore.close(); }
/** * Initializes the tests by adding documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)) .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); addDocument(writer, "C", "It shouldn't."); addDocument(writer, "D", "Should we, should we, should we."); reader2 = writer.getReader(); writer.close(); // re-open the searcher since we added more docs searcher2 = newSearcher(reader2); searcher2.setSimilarity(new DefaultSimilarity()); }
/** * Initializes the tests by adding 4 identical documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index mDirectory = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)) .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity())); addDocument(writer, "1", "I think it should work."); addDocument(writer, "2", "I think it should work."); addDocument(writer, "3", "I think it should work."); addDocument(writer, "4", "I think it should work."); reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); searcher.setSimilarity(new DefaultSimilarity()); }
public void testRewriteCoord1() throws Exception { final Similarity oldSimilarity = s.getSimilarity(); try { s.setSimilarity(new DefaultSimilarity() { @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap + 1); } }); BooleanQuery q1 = new BooleanQuery(); q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); BooleanQuery q2 = new BooleanQuery(); q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); q2.setMinimumNumberShouldMatch(1); TopDocs top1 = s.search(q1,null,100); TopDocs top2 = s.search(q2,null,100); assertSubsetOfSameScores(q2, top1, top2); } finally { s.setSimilarity(oldSimilarity); } }
public void testRewriteNegate() throws Exception { final Similarity oldSimilarity = s.getSimilarity(); try { s.setSimilarity(new DefaultSimilarity() { @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap + 1); } }); BooleanQuery q1 = new BooleanQuery(); q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); BooleanQuery q2 = new BooleanQuery(); q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT); TopDocs top1 = s.search(q1,null,100); TopDocs top2 = s.search(q2,null,100); assertSubsetOfSameScores(q2, top1, top2); } finally { s.setSimilarity(oldSimilarity); } }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc .add(newTextField(FIELD, values[i], Field.Store.YES)); writer.addDocument(doc); } indexReader = SlowCompositeReaderWrapper.wrap(writer.getReader()); writer.close(); indexSearcher = newSearcher(indexReader); indexSearcher.setSimilarity(new DefaultSimilarity()); }
@Test public void testQueries10() throws Exception { BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(field, "w2")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.SHOULD); int[] expDocNrs = {2, 3}; Similarity oldSimilarity = searcher.getSimilarity(); try { searcher.setSimilarity(new DefaultSimilarity(){ @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap - 1); } }); queriesTest(query, expDocNrs); } finally { searcher.setSimilarity(oldSimilarity); } }
public void openIndexForSearching(boolean useDerivedIndex) { try { if (useDerivedIndex) reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + DERIVED_INDEX_FOLDER))); else reader = DirectoryReader.open(FSDirectory.open(new File(folder + "/" + MAIN_INDEX_FOLDER))); searcher = new IndexSearcher(reader); searcher.setSimilarity(new DefaultSimilarity()); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); QueryParser typeQueryParser = new QueryParser(Version.LUCENE_4_9, "TYPE", new KeywordAnalyzer()); conceptQuery = typeQueryParser.parse(CONCEPT_TYPE_STRING); conceptIdQueryParser = new QueryParser(Version.LUCENE_4_9, "CONCEPT_ID", new KeywordAnalyzer()); conceptClassQueryParser = new QueryParser(Version.LUCENE_4_9, "CONCEPT_CLASS_ID", new KeywordAnalyzer()); vocabularyQueryParser = new QueryParser(Version.LUCENE_4_9, "VOCABULARY_ID", new KeywordAnalyzer()); keywordsQueryParser = new QueryParser(Version.LUCENE_4_9, "TERM", analyzer); domainQueryParser = new QueryParser(Version.LUCENE_4_9, "DOMAIN_ID", new KeywordAnalyzer()); standardConceptQueryParser = new QueryParser(Version.LUCENE_4_9, "STANDARD_CONCEPT", new KeywordAnalyzer()); termTypeQueryParser = new QueryParser(Version.LUCENE_4_9, "TERM_TYPE", new KeywordAnalyzer()); numDocs = reader.numDocs(); } catch (Exception e) { throw new RuntimeException(e); } }
public void testReallyNoNormsForDrillDown() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity sim = new DefaultSimilarity(); @Override public Similarity get(String name) { assertEquals("field", name); return sim; } }); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); FacetFields facetFields = new FacetFields(taxoWriter); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/'))); writer.addDocument(doc); writer.close(); taxoWriter.close(); dir.close(); taxoDir.close(); }
/** * Initializes the tests by adding documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)) .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); addDocument(writer, "C", "It shouldn't."); addDocument(writer, "D", "Should we, should we, should we."); reader2 = writer.getReader(); writer.close(); // re-open the searcher since we added more docs searcher2 = newSearcher(reader2); searcher2.setSimilarity(new DefaultSimilarity()); }
/** * Initializes the tests by adding 4 identical documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index mDirectory = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)) .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity())); addDocument(writer, "1", "I think it should work."); addDocument(writer, "2", "I think it should work."); addDocument(writer, "3", "I think it should work."); addDocument(writer, "4", "I think it should work."); reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); searcher.setSimilarity(new DefaultSimilarity()); }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc .add(newTextField(FIELD, values[i], Field.Store.YES)); writer.addDocument(doc); } indexReader = SlowCompositeReaderWrapper.wrap(writer.getReader()); writer.close(); indexSearcher = newSearcher(indexReader); indexSearcher.setSimilarity(new DefaultSimilarity()); }
/** * Initializes the tests by adding documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)) .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); addDocument(writer, "C", "It shouldn't."); addDocument(writer, "D", "Should we, should we, should we."); reader2 = writer.getReader(); writer.close(); // re-open the searcher since we added more docs searcher2 = newSearcher(reader2); searcher2.setSimilarity(new DefaultSimilarity()); }
/** * Initializes the tests by adding 4 identical documents to the index. */ @Override public void setUp() throws Exception { super.setUp(); // create test index mDirectory = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)) .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity())); addDocument(writer, "1", "I think it should work."); addDocument(writer, "2", "I think it should work."); addDocument(writer, "3", "I think it should work."); addDocument(writer, "4", "I think it should work."); reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); searcher.setSimilarity(new DefaultSimilarity()); }
public TermVectorsFilter(Fields termVectorsByField, Fields topLevelFields, Set<String> selectedFields, @Nullable AggregatedDfs dfs) { this.fields = termVectorsByField; this.topLevelFields = topLevelFields; this.selectedFields = selectedFields; this.dfs = dfs; this.scoreTerms = new HashMap<>(); this.sizes = AtomicLongMap.create(); this.similarity = new DefaultSimilarity(); }
private Query createQuery(EntityDisambiguationDPO dpo) { LearnToRankQuery query = new LearnToRankQuery(); List<LearnToRankClause> features = new LinkedList<LearnToRankClause>(); DefaultSimilarity defaultSim = new DefaultSimilarity(); // Feature 1 features.add(query.add(LuceneFeatures.queryLabelTerm( dpo.getSelectedText(), "title", defaultSim), "Feature1", true)); // Feature 2 features.add(query.add(LuceneFeatures.queryLabelTerm( dpo.getSelectedText(), "abstract", defaultSim), "Feature2", true)); // Feature 3 features.add(query.add(LuceneFeatures.queryStringTerm(dpo.getContext(), "title", defaultSim, Occur.SHOULD, DisambiguationMainService.MAXCLAUSECOUNT), "Feature3", false)); // Feature 4 features.add(query.add(LuceneFeatures.queryStringTerm(dpo.getContext(), "abstract", defaultSim, Occur.SHOULD, DisambiguationMainService.MAXCLAUSECOUNT), "Feature4", false)); features.get(0).setWeight(0.0056836f); features.get(1).setWeight(0.0305069f); features.get(2).setWeight(0.117543f); features.get(3).setWeight(0.365259f); return query; }
public static IndexSearcher newSearcher(IndexReader r, boolean wrapWithAssertions) { Random random = new Random(); int threads = 0; final ThreadPoolExecutor ex; if (random.nextBoolean()) { ex = null; } else { threads = randomIntBetween(random, 1, 8); ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("LuceneTestCase")); } if (ex != null) { r.addReaderClosedListener(new IndexReader.ReaderClosedListener() { @Override public void onClose(IndexReader reader) { shutdownExecutorService(ex); } }); } IndexSearcher ret; if (wrapWithAssertions) { ret = random.nextBoolean() ? new AssertingIndexSearcher(random, r, ex) : new AssertingIndexSearcher(random, r.getContext(), ex); } else { ret = random.nextBoolean() ? new IndexSearcher(r, ex) : new IndexSearcher(r.getContext(), ex); } ret.setSimilarity(new DefaultSimilarity()); return ret; }
public void testIDF() throws Exception { Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new DefaultSimilarity()); assertHits(new FunctionQuery( new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))), new float[] { 0.5945349f, 0.5945349f }); } finally { searcher.setSimilarity(saved); } }
public void testNorm() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new DefaultSimilarity()); assertHits(new FunctionQuery( new NormValueSource("byte")), new float[] { 0f, 0f }); } finally { searcher.setSimilarity(saved); } }
public void testTF() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new DefaultSimilarity()); assertHits(new FunctionQuery( new TFValueSource("bogus", "bogus", "text", new BytesRef("test"))), new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) }); assertHits(new FunctionQuery( new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))), new float[] { 0f, 1f }); } finally { searcher.setSimilarity(saved); } }