private void doTestTokenizer(String tokenizer) throws IOException { Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer); TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz); if (factory != null) { // we managed to fully create an instance. check a few more things: // if it implements MultiTermAware, sanity check its impl if (factory instanceof MultiTermAwareComponent) { AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent(); assertNotNull(mtc); // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it assertFalse(mtc instanceof CharFilterFactory); } // beast it just a little, it shouldnt throw exceptions: // (it should have thrown them in initialize) checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false); } }
private void doTestCharFilter(String charfilter) throws IOException { Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter); CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz); if (factory != null) { // we managed to fully create an instance. check a few more things: // if it implements MultiTermAware, sanity check its impl if (factory instanceof MultiTermAwareComponent) { AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent(); assertNotNull(mtc); // its not ok to return a tokenizer or tokenfilter here, this makes no sense assertTrue(mtc instanceof CharFilterFactory); } // beast it just a little, it shouldnt throw exceptions: // (it should have thrown them in initialize) checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false); } }
private void doTestTokenizer(String tokenizer) throws IOException { TokenizerFactory factory = TokenizerFactory.forName(tokenizer); if (initialize(factory)) { // we managed to fully create an instance. check a few more things: // if it implements MultiTermAware, sanity check its impl if (factory instanceof MultiTermAwareComponent) { AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent(); assertNotNull(mtc); // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it assertFalse(mtc instanceof CharFilterFactory); } // beast it just a little, it shouldnt throw exceptions: // (it should have thrown them in initialize) checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false); } }
private void doTestCharFilter(String charfilter) throws IOException { CharFilterFactory factory = CharFilterFactory.forName(charfilter); if (initialize(factory)) { // we managed to fully create an instance. check a few more things: // if it implements MultiTermAware, sanity check its impl if (factory instanceof MultiTermAwareComponent) { AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent(); assertNotNull(mtc); // its not ok to return a tokenizer or tokenfilter here, this makes no sense assertTrue(mtc instanceof CharFilterFactory); } // beast it just a little, it shouldnt throw exceptions: // (it should have thrown them in initialize) checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false); } }
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after the new classloader has been created to * register the services for use. */ static void reloadLuceneSPI(ClassLoader loader) { // do NOT change the order of these method calls! // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); Codec.reloadCodecs(loader); // Analysis: CharFilterFactory.reloadCharFilters(loader); TokenFilterFactory.reloadTokenFilters(loader); TokenizerFactory.reloadTokenizers(loader); }
/** * This method looks up a class with its fully qualified name (FQN), or a short-name * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis." * as the package prefix (e.g. "standard.ClassicTokenizerFactory" -> * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory"). * * If className contains a period, the class is first looked up as-is, assuming that it * is an FQN. If this fails, lookup is retried after prepending the Lucene analysis * package prefix to the class name. * * If className does not contain a period, the analysis SPI *Factory.lookupClass() * methods are used to find the class. * * @param className The name or the short name of the class. * @param expectedType The superclass className is expected to extend * @return the loaded class. * @throws ClassNotFoundException if lookup fails */ public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType) throws ClassNotFoundException { if (className.contains(".")) { try { // First, try className == FQN return Class.forName(className).asSubclass(expectedType); } catch (ClassNotFoundException e) { try { // Second, retry lookup after prepending the Lucene analysis package prefix return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType); } catch (ClassNotFoundException e1) { throw new ClassNotFoundException("Can't find class '" + className + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'"); } } } // No dot - use analysis SPI lookup final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst(""); if (CharFilterFactory.class.isAssignableFrom(expectedType)) { return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType); } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) { return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType); } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) { return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType); } throw new ClassNotFoundException("Can't find class '" + className + "'"); }
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories, TokenizerFactory tokenizerFactory, List<TokenFilterFactory> tokenFilterFactories) { this.charFilterFactories = charFilterFactories; assert null != tokenizerFactory; this.tokenizerFactory = tokenizerFactory; this.tokenFilterFactories = tokenFilterFactories; }
@Override public String toString() { StringBuilder sb = new StringBuilder("AnalyzerFactory("); if (null != name) { sb.append("name:"); sb.append(name); sb.append(", "); } if (null != positionIncrementGap) { sb.append("positionIncrementGap:"); sb.append(positionIncrementGap); sb.append(", "); } if (null != offsetGap) { sb.append("offsetGap:"); sb.append(offsetGap); sb.append(", "); } for (CharFilterFactory charFilterFactory: charFilterFactories) { sb.append(charFilterFactory); sb.append(", "); } sb.append(tokenizerFactory); for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { sb.append(", "); sb.append(tokenFilterFactory); } sb.append(')'); return sb.toString(); }
public void test() throws IOException { for (String tokenizer : TokenizerFactory.availableTokenizers()) { doTestTokenizer(tokenizer); } for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) { doTestTokenFilter(tokenFilter); } for (String charFilter : CharFilterFactory.availableCharFilters()) { doTestCharFilter(charFilter); } }
@Override public Reader initReader(String fieldName, Reader reader) { if (charFilters != null && charFilters.length > 0) { Reader cs = reader; for (CharFilterFactory charFilter : charFilters) { cs = charFilter.create(cs); } reader = cs; } return reader; }
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after {@link #addToClassLoader(String, FileFilter, boolean)} * and {@link #addToClassLoader(String,FileFilter,boolean)} before using * this ResourceLoader. */ void reloadLuceneSPI() { // Codecs: PostingsFormat.reloadPostingsFormats(this.classLoader); DocValuesFormat.reloadDocValuesFormats(this.classLoader); Codec.reloadCodecs(this.classLoader); // Analysis: CharFilterFactory.reloadCharFilters(this.classLoader); TokenFilterFactory.reloadTokenFilters(this.classLoader); TokenizerFactory.reloadTokenizers(this.classLoader); }
public void add(Object current) { if (!(current instanceof MultiTermAwareComponent)) return; AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent(); if (newComponent instanceof TokenFilterFactory) { if (filters == null) { filters = new ArrayList<TokenFilterFactory>(2); } filters.add((TokenFilterFactory) newComponent); } else if (newComponent instanceof TokenizerFactory) { tokenizer = (TokenizerFactory) newComponent; } else if (newComponent instanceof CharFilterFactory) { if (charFilters == null) { charFilters = new ArrayList<CharFilterFactory>(1); } charFilters.add((CharFilterFactory) newComponent); } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent); } }
@Override protected void init(IndexSchema schema, Map<String, String> args) { super.init(schema, args); String p = args.remove("precisionStep"); if (p != null) { precisionStepArg = Integer.parseInt(p); } // normalize the precisionStep precisionStep = precisionStepArg; if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE; String t = args.remove("type"); if (t != null) { try { type = TrieTypes.valueOf(t.toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid type specified in schema.xml for field: " + args.get("name"), e); } } CharFilterFactory[] filterFactories = new CharFilterFactory[0]; TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep), tokenFilterFactories); // for query time we only need one token, so we use the biggest possible precisionStep: queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE), tokenFilterFactories); }
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after {@link #addToClassLoader(String)} * and {@link #addToClassLoader(String,FileFilter)} before using * this ResourceLoader. */ void reloadLuceneSPI() { // Codecs: PostingsFormat.reloadPostingsFormats(this.classLoader); DocValuesFormat.reloadDocValuesFormats(this.classLoader); Codec.reloadCodecs(this.classLoader); // Analysis: CharFilterFactory.reloadCharFilters(this.classLoader); TokenFilterFactory.reloadTokenFilters(this.classLoader); TokenizerFactory.reloadTokenizers(this.classLoader); }
public void testCharFilters() { Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.CharFilterFactory.availableCharFilters()); missing.removeAll(getCharFilters().keySet()); assertTrue("new charfilters found, please update KNOWN_CHARFILTERS: " + missing.toString(), missing.isEmpty()); }