private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) throws IOException { CharsRef scratch = new CharsRef(); scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null || suggestions.size() == 0) { return null; } return suggestions; }
/** * Test memory required for the storage. */ public void testStorageNeeds() throws Exception { System.err.println("-- RAM consumption"); for (Class<? extends Lookup> cls : benchmarkClasses) { Lookup lookup = buildLookup(cls, dictionaryInput); long sizeInBytes; if (lookup instanceof AnalyzingSuggester) { // Just get size of FST: else we are also measuring // size of MockAnalyzer which is non-trivial and // varies depending on test seed: sizeInBytes = ((AnalyzingSuggester) lookup).sizeInBytes(); } else { sizeInBytes = RamUsageEstimator.sizeOf(lookup); } System.err.println( String.format(Locale.ROOT, "%-15s size[B]:%,13d", lookup.getClass().getSimpleName(), sizeInBytes)); } }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token t : options.tokens) { scratch.chars = t.buffer(); scratch.offset = 0; scratch.length = t.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } for (LookupResult lr : suggestions) { res.add(t, lr.key.toString(), (int)lr.value); } } return res; }
public MathNamesSuggester() { try { suggester = new AnalyzingSuggester(new StandardAnalyzer(Version.LUCENE_4_10_2)); suggester.build(new PlainTextDictionary(getMathNamesReader())); } catch (IOException ex) { Logger.getLogger(MathNamesSuggester.class.getName()).log(Level.SEVERE, null, ex); } }
private static Lookup setupSuggester_Analyzing() { setupIndexReader(); final Lookup suggester[] = new AnalyzingSuggester[1]; Display.getDefault().syncExec(new Runnable() { @Override public void run() { BusyIndicator.showWhile(Display.getDefault(), new Runnable() { @Override public void run() { try { final Analyzer queryAnalyzer = new StandardAnalyzer(new CharArraySet(0, true)); final InputIterator termIterator = createTermIterator(); suggester[0] = new AnalyzingSuggester(queryAnalyzer); suggester[0].build(termIterator); } catch (final Exception e) { StatusUtil.showStatus(e); } } }); } }); return suggester[0]; }
@Override public Lookup create(NamedList params, SolrCore core) { // mandatory parameter Object fieldTypeName = params.get(QUERY_ANALYZER); if (fieldTypeName == null) { throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); } FieldType ft = core.getSchema().getFieldTypeByName(fieldTypeName.toString()); Analyzer indexAnalyzer = ft.getAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); // optional parameters boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString()) : true; boolean preserveSep = params.get(PRESERVE_SEP) != null ? Boolean.valueOf(params.get(PRESERVE_SEP).toString()) : true; int flags = 0; if (exactMatchFirst) { flags |= AnalyzingSuggester.EXACT_FIRST; } if (preserveSep) { flags |= AnalyzingSuggester.PRESERVE_SEP; } int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null ? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString()) : 256; int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null ? Integer.parseInt(params.get(MAX_EXPANSIONS).toString()) : -1; return new AnalyzingSuggester(indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions); }
@Override public Lookup create(NamedList params, SolrCore core) { // mandatory parameter Object fieldTypeName = params.get(QUERY_ANALYZER); if (fieldTypeName == null) { throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); } FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); if (ft == null) { throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); } Analyzer indexAnalyzer = ft.getIndexAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); // optional parameters boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString()) : true; boolean preserveSep = params.get(PRESERVE_SEP) != null ? Boolean.valueOf(params.get(PRESERVE_SEP).toString()) : true; int flags = 0; if (exactMatchFirst) { flags |= AnalyzingSuggester.EXACT_FIRST; } if (preserveSep) { flags |= AnalyzingSuggester.PRESERVE_SEP; } int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null ? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString()) : 256; int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null ? Integer.parseInt(params.get(MAX_EXPANSIONS).toString()) : -1; boolean preservePositionIncrements = params.get(PRESERVE_POSITION_INCREMENTS) != null ? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString()) : false; return new AnalyzingSuggester(indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements); }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token currentToken : options.tokens) { scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); // get more than the requested suggestions as a lot get collapsed by the corrections List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10); if (suggestions == null || suggestions.size() == 0) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>(); for (LookupResult lr : suggestions) { String suggestion = lr.key.toString(); if(this.suggestionAnalyzer != null) { String correction = getAnalyzerResult(suggestion); // multiple could map to the same, so don't repeat suggestions if(!isStringNullOrEmpty(correction)){ if(lhm.containsKey(correction)){ lhm.put(correction, lhm.get(correction) + (int) lr.value); } else { lhm.put(correction, (int) lr.value); } } } else { lhm.put(suggestion, (int) lr.value); } if(lhm.size() >= options.count){ break; } } // sort by new doc frequency Map<String, Integer> orderedMap = null; if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR){ // retain the sort order from above orderedMap = lhm; } else { orderedMap = new TreeMap<String, Integer>(new Comparator<String>() { @Override public int compare(String s1, String s2) { return lhm.get(s2).compareTo(lhm.get(s1)); } }); orderedMap.putAll(lhm); } for(Map.Entry<String, Integer> entry: orderedMap.entrySet()){ res.add(currentToken, entry.getKey(), entry.getValue()); } } return res; }
private static Lookup setupSuggester_NGramAnalyzing() { setupIndexReader(); final Lookup suggester[] = new AnalyzingSuggester[1]; try { // static { // // analyzer = new Analyzer() { // // @Override // public TokenStream tokenStream(final String fieldName, final Reader reader) { // // TokenStream result = new StandardTokenizer(reader); // // result = new StandardFilter(result); // result = new LowerCaseFilter(result); // result = new ISOLatin1AccentFilter(result); // result = new StopFilter(result, ENGLISH_STOP_WORDS); // result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20); // // return result; // } // }; // // autocompletionAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) { // // @Override // protected Analyzer getWrappedAnalyzer(final String fieldName) { // return analyzer; // } // // @Override // protected TokenStreamComponents wrapComponents( final String fieldName, // final TokenStreamComponents components) { // // final NGramTokenFilter filter = new NGramTokenFilter(components.getTokenStream(), 2, 100); // final Tokenizer tokenizer = components.getTokenizer(); // // return new TokenStreamComponents(tokenizer, filter); // } // }; // // newInfixSuggester = new AnalyzingSuggester(autocompletionAnalyzer, analyzer); // } } catch (final Exception e) { StatusUtil.showStatus(e); } return suggester[0]; }
@Override public Lookup create(NamedList params, SolrCore core) { // mandatory parameter Object fieldTypeName = params.get(QUERY_ANALYZER); if (fieldTypeName == null) { throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory"); } FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString()); if (ft == null) { throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema"); } Analyzer indexAnalyzer = ft.getAnalyzer(); Analyzer queryAnalyzer = ft.getQueryAnalyzer(); // optional parameters boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString()) : true; boolean preserveSep = params.get(PRESERVE_SEP) != null ? Boolean.valueOf(params.get(PRESERVE_SEP).toString()) : true; int flags = 0; if (exactMatchFirst) { flags |= AnalyzingSuggester.EXACT_FIRST; } if (preserveSep) { flags |= AnalyzingSuggester.PRESERVE_SEP; } int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null ? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString()) : 256; int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null ? Integer.parseInt(params.get(MAX_EXPANSIONS).toString()) : -1; boolean preservePositionIncrements = params.get(PRESERVE_POSITION_INCREMENTS) != null ? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString()) : false; return new AnalyzingSuggester(indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements); }