SeqSpanScorer(SeqSpanWeight weight, PostingsAndFreq[] postings, Similarity.SimScorer docScorer, boolean needsScores, float matchCost) throws IOException { super(weight); this.selfWeight = weight; this.docScorer = docScorer; this.needsScores = needsScores; List<DocIdSetIterator> iterators = new ArrayList<>(); List<PostingsAndPosition> postingsAndPositions = new ArrayList<>(); for(PostingsAndFreq posting : postings) { iterators.add(posting.postings); postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position)); } conjunction = ConjunctionDISI.intersectIterators(iterators); this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]); this.matchCost = matchCost; }
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; chunkStates = new ChunkState[postings.length]; endMinus1 = postings.length-1; lead = postings[0].postings; // min(cost) cost = lead.cost(); for(int i=0;i<postings.length;i++) { chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position); } }
protected void processPayload(Similarity similarity) throws IOException { if (termSpans.isPayloadAvailable()) { final DocsAndPositionsEnum postings = termSpans.getPostings(); payload = postings.getPayload(); if (payload != null) { payloadScore = function.currentScore(doc, term.field(), spans.start(), spans.end(), payloadsSeen, payloadScore, docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload)); } else { payloadScore = function.currentScore(doc, term.field(), spans.start(), spans.end(), payloadsSeen, payloadScore, 1F); } payloadsSeen++; } else { // zero out the payload? } }
@Override public NumericDocValues getNormValues(String field) { FieldInfo fieldInfo = fieldInfos.get(field); if (fieldInfo == null || fieldInfo.omitsNorms()) return null; NumericDocValues norms = cachedNormValues; Similarity sim = getSimilarity(); if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached? Info info = getInfo(field); int numTokens = info != null ? info.numTokens : 0; int numOverlapTokens = info != null ? info.numOverlapTokens : 0; float boost = info != null ? info.getBoost() : 1.0f; FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost); long value = sim.computeNorm(invertState); norms = new MemoryIndexNormDocValues(value); // cache it for future reuse cachedNormValues = norms; cachedFieldName = field; cachedSimilarity = sim; if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens); } return norms; }
public void testReallyNoNormsForDrillDown() throws Exception { Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity sim = new DefaultSimilarity(); @Override public Similarity get(String name) { assertEquals("field", name); return sim; } }); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); doc.add(new FacetField("a", "path")); writer.addDocument(config.build(taxoWriter, doc)); IOUtils.close(writer, taxoWriter, dir, taxoDir); }
/** * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField * and analyzes them using the PayloadAnalyzer * @param similarity The Similarity class to use in the Searcher * @param numDocs The num docs to add * @return An IndexSearcher */ // TODO: randomize public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(); // TODO randomize this IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES)); doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES)); doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES)); writer.addDocument(doc); } reader = DirectoryReader.open(writer, true); writer.close(); IndexSearcher searcher = LuceneTestCase.newSearcher(reader); searcher.setSimilarity(similarity); return searcher; }
public void testRewriteCoord1() throws Exception { final Similarity oldSimilarity = s.getSimilarity(); try { s.setSimilarity(new DefaultSimilarity() { @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap + 1); } }); BooleanQuery q1 = new BooleanQuery(); q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); BooleanQuery q2 = new BooleanQuery(); q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); q2.setMinimumNumberShouldMatch(1); TopDocs top1 = s.search(q1,null,100); TopDocs top2 = s.search(q2,null,100); assertSubsetOfSameScores(q2, top1, top2); } finally { s.setSimilarity(oldSimilarity); } }
public void testRewriteNegate() throws Exception { final Similarity oldSimilarity = s.getSimilarity(); try { s.setSimilarity(new DefaultSimilarity() { @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap + 1); } }); BooleanQuery q1 = new BooleanQuery(); q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); BooleanQuery q2 = new BooleanQuery(); q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD); q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT); TopDocs top1 = s.search(q1,null,100); TopDocs top2 = s.search(q2,null,100); assertSubsetOfSameScores(q2, top1, top2); } finally { s.setSimilarity(oldSimilarity); } }
@Test public void testQueries10() throws Exception { BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(field, "w2")), BooleanClause.Occur.MUST); query.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.SHOULD); int[] expDocNrs = {2, 3}; Similarity oldSimilarity = searcher.getSimilarity(); try { searcher.setSimilarity(new DefaultSimilarity(){ @Override public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap - 1); } }); queriesTest(query, expDocNrs); } finally { searcher.setSimilarity(oldSimilarity); } }
public void buildIndex(Directory dir) throws IOException { Random random = random(); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig config = newIndexWriterConfig(analyzer); Similarity provider = new MySimProvider(); config.setSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues()); int num = atLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.nextDoc(); int boost = random().nextInt(255); Field f = new TextField(byteTestField, "" + boost, Field.Store.YES); f.setBoost(boost); doc.add(f); writer.addDocument(doc); doc.removeField(byteTestField); if (rarely()) { writer.commit(); } } writer.commit(); writer.close(); docs.close(); }
static SimilarityFactory readSimilarity(SolrResourceLoader loader, Node node) { if (node==null) { return null; } else { SimilarityFactory similarityFactory; final String classArg = ((Element) node).getAttribute(SimilarityFactory.CLASS_NAME); final Object obj = loader.newInstance(classArg, Object.class, "search.similarities."); if (obj instanceof SimilarityFactory) { // configure a factory, get a similarity back final NamedList<Object> namedList = DOMUtil.childNodesToNamedList(node); namedList.add(SimilarityFactory.CLASS_NAME, classArg); SolrParams params = SolrParams.toSolrParams(namedList); similarityFactory = (SimilarityFactory)obj; similarityFactory.init(params); } else { // just like always, assume it's a Similarity and get a ClassCastException - reasonable error handling similarityFactory = new SimilarityFactory() { @Override public Similarity getSimilarity() { return (Similarity) obj; } }; } return similarityFactory; } }
private void initialize(File indexPath, int kmerSize, int kmerSkips, boolean minStrandKmer, double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm, Similarity similarity) throws Exception { if(!indexPath.exists() || !indexPath.isDirectory()) { throw new IllegalArgumentException("indexPath is not a directory or does not exist"); } this.indexPath = indexPath; this.kmerSize = kmerSize; this.kmerSkips = kmerSkips; this.minStrandKmer = minStrandKmer; this.queryAnalyzer = new KmerQueryAnalyzer(this.kmerSize, this.kmerSkips, this.minStrandKmer); Directory dir = new MMapDirectory(this.indexPath.toPath()); this.indexReader = DirectoryReader.open(dir); this.indexSearcher = new IndexSearcher(this.indexReader); if(similarity != null) { this.indexSearcher.setSimilarity(similarity); } this.minShouldMatch = minShouldMatch; this.queryGenerationAlgorithm = queryGenerationAlgorithm; BooleanQuery.setMaxClauseCount(10000); }
LateParsingQuery(Query toQuery, Query innerQuery, int minChildren, int maxChildren, String parentType, ScoreMode scoreMode, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity) { this.toQuery = toQuery; this.innerQuery = innerQuery; this.minChildren = minChildren; this.maxChildren = maxChildren; this.parentType = parentType; this.scoreMode = scoreMode; this.parentChildIndexFieldData = parentChildIndexFieldData; this.similarity = similarity; }
/** * Creates a new {@link org.elasticsearch.index.engine.EngineConfig} */ public EngineConfig(OpenMode openMode, ShardId shardId, ThreadPool threadPool, IndexSettings indexSettings, Engine.Warmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy, MergePolicy mergePolicy, Analyzer analyzer, Similarity similarity, CodecService codecService, Engine.EventListener eventListener, TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy, TranslogConfig translogConfig, TimeValue flushMergesAfter, ReferenceManager.RefreshListener refreshListeners, long maxUnsafeAutoIdTimestamp) { if (openMode == null) { throw new IllegalArgumentException("openMode must not be null"); } this.shardId = shardId; this.indexSettings = indexSettings; this.threadPool = threadPool; this.warmer = warmer == null ? (a) -> {} : warmer; this.store = store; this.deletionPolicy = deletionPolicy; this.mergePolicy = mergePolicy; this.analyzer = analyzer; this.similarity = similarity; this.codecService = codecService; this.eventListener = eventListener; codecName = indexSettings.getValue(INDEX_CODEC_SETTING); // We give IndexWriter a "huge" (256 MB) buffer, so it won't flush on its own unless the ES indexing buffer is also huge and/or // there are not too many shards allocated to this node. Instead, IndexingMemoryController periodically checks // and refreshes the most heap-consuming shards when total indexing heap usage across all shards is too high: indexingBufferSize = new ByteSizeValue(256, ByteSizeUnit.MB); this.translogRecoveryPerformer = translogRecoveryPerformer; this.queryCache = queryCache; this.queryCachingPolicy = queryCachingPolicy; this.translogConfig = translogConfig; this.flushMergesAfter = flushMergesAfter; this.openMode = openMode; this.refreshListeners = refreshListeners; assert maxUnsafeAutoIdTimestamp >= IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP : "maxUnsafeAutoIdTimestamp must be >= -1 but was " + maxUnsafeAutoIdTimestamp; this.maxUnsafeAutoIdTimestamp = maxUnsafeAutoIdTimestamp; }
public void testAddSimilarity() throws IOException { Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.similarity.my_similarity.type", "test_similarity") .put("index.similarity.my_similarity.key", "there is a key") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())); module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings) -> new SimilarityProvider() { @Override public String name() { return string; } @Override public Similarity get() { return new TestSimilarity(providerSettings.get("key")); } }); IndexService indexService = newIndexService(module); SimilarityService similarityService = indexService.similarityService(); assertNotNull(similarityService.getSimilarity("my_similarity")); assertTrue(similarityService.getSimilarity("my_similarity").get() instanceof TestSimilarity); assertEquals("my_similarity", similarityService.getSimilarity("my_similarity").name()); assertEquals("there is a key", ((TestSimilarity) similarityService.getSimilarity("my_similarity").get()).key); indexService.close("simon says", false); }
public void testNonDefaultSimilarity() throws Exception { QueryShardContext shardContext = createShardContext(); HasChildQueryBuilder hasChildQueryBuilder = QueryBuilders.hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None); HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext); Similarity expected = SimilarityService.BUILT_IN.get(similarity) .apply(similarity, Settings.EMPTY, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build()) .get(); assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass())); }
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; this.spans = spans; doc = -1; more = spans.next(); }
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, int slop, Similarity.SimScorer docScorer) { super(weight); this.docScorer = docScorer; this.slop = slop; this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); // min(cost) cost = postings[0].postings.cost(); // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. if (postings.length > 0) { min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms); max = min; max.doc = -1; for (int i = 1; i < postings.length; i++) { PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); max.next = pp; max = pp; max.doc = -1; } max.next = min; // make it cyclic for easier manipulation } }
/** * Expert: set the {@link Similarity} implementation used by this IndexWriter. * <p> * <b>NOTE:</b> the similarity cannot be null. * * <p>Only takes effect when IndexWriter is first created. */ public IndexWriterConfig setSimilarity(Similarity similarity) { if (similarity == null) { throw new IllegalArgumentException("similarity must not be null"); } this.similarity = similarity; return this; }
@Inject public SimilarityService(Index index, IndexSettingsService indexSettingsService, final SimilarityLookupService similarityLookupService, final MapperService mapperService) { super(index, indexSettingsService.getSettings()); this.similarityLookupService = similarityLookupService; this.mapperService = mapperService; Similarity defaultSimilarity = similarityLookupService.similarity(SimilarityLookupService.DEFAULT_SIMILARITY).get(); // Expert users can configure the base type as being different to default, but out-of-box we use default. Similarity baseSimilarity = (similarityLookupService.similarity("base") != null) ? similarityLookupService.similarity("base").get() : defaultSimilarity; this.perFieldSimilarity = (mapperService != null) ? new PerFieldSimilarity(defaultSimilarity, baseSimilarity, mapperService) : defaultSimilarity; }
public static Query joinUtilHelper(String parentType, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity, Query toQuery, ScoreType scoreType, Query innerQuery, int minChildren, int maxChildren) throws IOException { ScoreMode scoreMode; // TODO: move entirely over from ScoreType to org.apache.lucene.join.ScoreMode, when we drop the 1.x parent child code. switch (scoreType) { case NONE: scoreMode = ScoreMode.None; break; case MIN: scoreMode = ScoreMode.Min; break; case MAX: scoreMode = ScoreMode.Max; break; case SUM: scoreMode = ScoreMode.Total; break; case AVG: scoreMode = ScoreMode.Avg; break; default: throw new UnsupportedOperationException("score type [" + scoreType + "] not supported"); } // 0 in pre 2.x p/c impl means unbounded if (maxChildren == 0) { maxChildren = Integer.MAX_VALUE; } return new LateParsingQuery(toQuery, innerQuery, minChildren, maxChildren, parentType, scoreMode, parentChildIndexFieldData, similarity); }
/** * Creates a new {@link org.elasticsearch.index.engine.EngineConfig} */ public EngineConfig(ShardId shardId, ThreadPool threadPool, ShardIndexingService indexingService, Settings indexSettings, IndicesWarmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy, MergePolicy mergePolicy, MergeSchedulerConfig mergeSchedulerConfig, Analyzer analyzer, Similarity similarity, CodecService codecService, Engine.FailedEngineListener failedEngineListener, TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy, IndexSearcherWrappingService wrappingService, TranslogConfig translogConfig) { this.shardId = shardId; this.indexSettings = indexSettings; this.threadPool = threadPool; this.indexingService = indexingService; this.warmer = warmer; this.store = store; this.deletionPolicy = deletionPolicy; this.mergePolicy = mergePolicy; this.mergeSchedulerConfig = mergeSchedulerConfig; this.analyzer = analyzer; this.similarity = similarity; this.codecService = codecService; this.failedEngineListener = failedEngineListener; this.wrappingService = wrappingService; this.optimizeAutoGenerateId = indexSettings.getAsBoolean(EngineConfig.INDEX_OPTIMIZE_AUTOGENERATED_ID_SETTING, false); this.compoundOnFlush = indexSettings.getAsBoolean(EngineConfig.INDEX_COMPOUND_ON_FLUSH, compoundOnFlush); codecName = indexSettings.get(EngineConfig.INDEX_CODEC_SETTING, EngineConfig.DEFAULT_CODEC_NAME); // We start up inactive and rely on IndexingMemoryController to give us our fair share once we start indexing: indexingBufferSize = IndexingMemoryController.INACTIVE_SHARD_INDEXING_BUFFER; gcDeletesInMillis = indexSettings.getAsTime(INDEX_GC_DELETES_SETTING, EngineConfig.DEFAULT_GC_DELETES).millis(); versionMapSizeSetting = indexSettings.get(INDEX_VERSION_MAP_SIZE, DEFAULT_VERSION_MAP_SIZE); updateVersionMapSize(); this.translogRecoveryPerformer = translogRecoveryPerformer; this.forceNewTranslog = indexSettings.getAsBoolean(INDEX_FORCE_NEW_TRANSLOG, false); this.queryCache = queryCache; this.queryCachingPolicy = queryCachingPolicy; this.translogConfig = translogConfig; }
/** * Construct an <code>query.{@link AugmentedTermScorer}</code>. * * @param weight * The weight of the <code>Term</code> in the query. * @param mainTerm * An iterator over the documents matching the main <code>Term</code>. * @param similarPostings * A list of <code>PostingsEnumWeightTuple</code>: term iterator, weight pairs * @param docScorer * The <code>Similarity.SimScorer</code> implementation * to be used for score computations. */ public AugmentedTermScorer(Weight weight, PostingsEnum mainTerm, List<PostingsEnumWeightTuple> similarPostings, Similarity.SimScorer docScorer) { super(weight); this.postings = new PostingsEnumWeightTuple[similarPostings.size() + 1]; this.postings[0] = new PostingsEnumWeightTuple(mainTerm,1f); for (int i = 0; i < similarPostings.size(); i++) { this.postings[i + 1] = similarPostings.get(i); } this.iterator = new MultiDocIdSetIterator(this.postings); this.docScorer = docScorer; }
@Override public Similarity getInstance(Map<String, String> params) throws IOException { String dict = params.get("dict"); String normLowerBound = params.get("norm"); Similarity similarity; if (Strings.isNullOrEmpty(normLowerBound)) { similarity = new LindenSimilarity(IDFManager.createInstance(dict)); } else { similarity = new LindenSimilarity(IDFManager.createInstance(dict), Float.parseFloat(normLowerBound)); } return similarity; }
public TermDocsEnum(FlexibleQuery.FlexibleTerm term, int docFreq, DocsAndPositionsEnum postings, Similarity.SimScorer docScorer, int termPos) throws IOException { this.term = term; this.postings = postings; this.docFreq = docFreq; this.docScorer = docScorer; this.termPos = termPos; }
public Explanation explain(Similarity similarity, Query query) { if (!isMatched()) return null; ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+query+" in "+ doc +") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq)); result.addDetail(scoreExplanation); result.setValue(scoreExplanation.getValue()); result.setMatch(true); return result; }
@Before public void setupIndex() throws IOException { dirUnderTest = newDirectory(); List<Similarity> sims = Arrays.asList( new ClassicSimilarity(), new SweetSpotSimilarity(), // extends Classic new BM25Similarity(), new LMDirichletSimilarity(), new BooleanSimilarity(), new LMJelinekMercerSimilarity(0.2F), new AxiomaticF3LOG(0.5F, 10), new DFISimilarity(new IndependenceChiSquared()), new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1()), new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3()) ); similarity = sims.get(random().nextInt(sims.size())); indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity)); for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newStringField("id", "" + i, Field.Store.YES)); doc.add(newField("field", docs[i], Store.YES)); indexWriterUnderTest.addDocument(doc); } indexWriterUnderTest.commit(); indexWriterUnderTest.forceMerge(1); indexWriterUnderTest.flush(); indexReaderUnderTest = indexWriterUnderTest.getReader(); searcherUnderTest = newSearcher(indexReaderUnderTest); searcherUnderTest.setSimilarity(similarity); }
void resolve() { if (resolved) { return; } // else: resolved == false if (getLastUpdated() == INDEX_INVALIDATED) { return; } // hashAlgorithm hashAlgorithm = get("hash.algorithm"); if (hashAlgorithm == null) { return; } // version version = get("lucene.version"); if (version == null) { return; } // analyzer Function<Version,Analyzer> analyzerFactory = get("lucene.analyzer"); if (analyzerFactory == null) { return; } analyzer = analyzerFactory.apply(version); // similarity Supplier<Similarity> similarityFactory = get("scoring.model"); if (similarityFactory == null) { return; } similarity = similarityFactory.get(); // directory Function<File,Directory> directoryFactory = get("directory.type"); if (directoryFactory == null) { return; } directory = directoryFactory.apply( new File(configDir.getPath() + File.separator + INDEX_DIR)); if (directory == null) { return; } // we made it: config is properly resolved resolved = true; }
/** * Create a new FuzzyQuery that will match terms with an edit distance of at * most <code>maxEdits</code> to <code>term</code>. If a * <code>prefixLength</code> > 0 is specified, a common prefix of that * length is also required. * * @param term * the term to search for * @param maxEdits * must be >= 0 and <= * {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength * length of common (non-fuzzy) prefix * @param maxExpansions * the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the * query is rewritten, then the maxClauseCount will be used * instead. * @param transpositions * true if transpositions should be treated as a primitive edit * operation. If this is false, comparisons will implement the * classic Levenshtein algorithm. */ public LearnToRankFuzzyQuery(final Term term, final int maxEdits, final int prefixLength, final int maxExpansions, final boolean transpositions, final Similarity sim) { super(term.field()); if ((maxEdits < 0) || (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) { throw new IllegalArgumentException( "maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } if (prefixLength < 0) { throw new IllegalArgumentException( "prefixLength cannot be negative."); } if (maxExpansions < 0) { throw new IllegalArgumentException( "maxExpansions cannot be negative."); } this.term = term; this.maxEdits = maxEdits; this.prefixLength = prefixLength; this.transpositions = transpositions; this.maxExpansions = maxExpansions; setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite( maxExpansions, sim)); // setRewriteMethod(new // LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite( // maxExpansions)); }
/** * Expert: constructs a TermQuery that will use the provided docFreq instead * of looking up the docFreq against the searcher. */ public LearnToRankTermQuery(final Term term, final int docFreq, final Similarity sim) { this.term = term; this.docFreq = docFreq; perReaderTermS = null; this.sim = sim; }
/** * Expert: constructs a TermQuery that will use the provided docFreq instead * of looking up the docFreq against the searcher. */ public LearnToRankTermQuery(final Term term, final TermContext states, final Similarity sim) { assert states != null; this.term = term; docFreq = states.docFreq(); perReaderTermS = states; this.sim = sim; }
public static Query queryStringTerm(String str, String field, Similarity sim, Occur occ, int maxclause) { final String[] split = str.split(" "); final LTRBooleanQuery bquery = new LTRBooleanQuery(); for (final String element : split) { final LearnToRankTermQuery tquery = new LearnToRankTermQuery( new Term(field, element.toLowerCase(Locale.US)), sim); bquery.add(tquery, occ); } return bquery; }
public static Query queryStringFuzzy(String str, String field, Similarity sim, Occur occ, int maxclause) { final String[] split = str.split(" "); final LTRBooleanQuery bquery = new LTRBooleanQuery(); for (final String element : split) { final LearnToRankFuzzyQuery tquery = new LearnToRankFuzzyQuery( new Term(field, element.toLowerCase(Locale.US)), sim); bquery.add(tquery, occ); } return bquery; }
AbstractKnowledgeBase(String uri, boolean dynamic, Similarity sim) { super(); this.indexUri = uri; this.dynamic = dynamic; File indexDir = new File(indexUri); Directory dir; try { dir = FSDirectory.open(indexDir); this.manager = new SearcherManager(dir, new SearcherFactory()); } catch (IOException e) { logger.error("IOException in "+AbstractKnowledgeBase.class.getName(), e); } }
static TFIDFSimilarity asTFIDF(Similarity sim, String field) { while (sim instanceof PerFieldSimilarityWrapper) { sim = ((PerFieldSimilarityWrapper)sim).get(field); } if (sim instanceof TFIDFSimilarity) { return (TFIDFSimilarity)sim; } else { return null; } }
public void testIDF() throws Exception { Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new DefaultSimilarity()); assertHits(new FunctionQuery( new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))), new float[] { 0.5945349f, 0.5945349f }); } finally { searcher.setSimilarity(saved); } }
public void testNorm() throws Exception { Similarity saved = searcher.getSimilarity(); try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new DefaultSimilarity()); assertHits(new FunctionQuery( new NormValueSource("byte")), new float[] { 0f, 0f }); } finally { searcher.setSimilarity(saved); } }