Java 类org.apache.lucene.analysis.util.TokenizerFactory 实例源码

项目:search    文件:TestSynonymMap.java   
public void testBigramTokenizer() throws Exception {
  SlowSynonymMap synMap;

  // prepare bi-gram tokenizer factory
  Map<String, String> args = new HashMap<>();
  args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, "4.4");
  args.put("minGramSize","2");
  args.put("maxGramSize","2");
  TokenizerFactory tf = new NGramTokenizerFactory(args);

  // (ab)->(bc)->(cd)->[ef][fg][gh]
  List<String> rules = new ArrayList<>();
  rules.add( "abcd=>efgh" );
  synMap = new SlowSynonymMap( true );
  SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
  assertEquals( 1, synMap.submap.size() );
  assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
  assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "ef" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "fg" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
项目:search    文件:TestFactories.java   
private void doTestTokenizer(String tokenizer) throws IOException {
  Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
  TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
      assertFalse(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
  }
}
项目:NYBC    文件:TestSynonymMap.java   
public void testBigramTokenizer() throws Exception {
  SlowSynonymMap synMap;

  // prepare bi-gram tokenizer factory
  TokenizerFactory tf = new NGramTokenizerFactory();
  Map<String, String> args = new HashMap<String, String>();
  args.put("minGramSize","2");
  args.put("maxGramSize","2");
  tf.init( args );

  // (ab)->(bc)->(cd)->[ef][fg][gh]
  List<String> rules = new ArrayList<String>();
  rules.add( "abcd=>efgh" );
  synMap = new SlowSynonymMap( true );
  SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
  assertEquals( 1, synMap.submap.size() );
  assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
  assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "ef" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "fg" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
项目:NYBC    文件:TestFactories.java   
private void doTestTokenizer(String tokenizer) throws IOException {
  TokenizerFactory factory = TokenizerFactory.forName(tokenizer);
  if (initialize(factory)) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
      assertFalse(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSynonymMap.java   
public void testBigramTokenizer() throws Exception {
  SlowSynonymMap synMap;

  // prepare bi-gram tokenizer factory
  Map<String, String> args = new HashMap<String, String>();
  args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, "4.4");
  args.put("minGramSize","2");
  args.put("maxGramSize","2");
  TokenizerFactory tf = new NGramTokenizerFactory(args);

  // (ab)->(bc)->(cd)->[ef][fg][gh]
  List<String> rules = new ArrayList<String>();
  rules.add( "abcd=>efgh" );
  synMap = new SlowSynonymMap( true );
  SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
  assertEquals( 1, synMap.submap.size() );
  assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
  assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "ef" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "fg" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFactories.java   
private void doTestTokenizer(String tokenizer) throws IOException {
  Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
  TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
      assertFalse(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
  }
}
项目:elasticsearch_my    文件:PluginsService.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
项目:Elasticsearch    文件:PluginsService.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
项目:hanlp-lucene-plugin    文件:HanLPTokenizerFactoryTest.java   
public void testCreate() throws Exception
{
    Map<String, String> args = new TreeMap<>();
    args.put("enableTraditionalChineseMode", "true");
    TokenizerFactory factory = new HanLPTokenizerFactory(args);
    Tokenizer tokenizer = factory.create(null);

    tokenizer.setReader(new StringReader("大衛貝克漢不僅僅是名著名球員,球場以外,其妻為前" +
                                                 "辣妹合唱團成員維多利亞·碧咸,亦由於他擁有" +
                                                 "突出外表、百變髮型及正面的形象,以至自己" +
                                                 "品牌的男士香水等商品,及長期擔任運動品牌" +
                                                 "Adidas的代言人,因此對大眾傳播媒介和時尚界" +
                                                 "等方面都具很大的影響力,在足球圈外所獲得的" +
                                                 "認受程度可謂前所未見。"));
    tokenizer.reset();
    while (tokenizer.incrementToken())
    {
        CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:search    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:search    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:search    文件:TestHMMChineseTokenizerFactory.java   
/** Test showing the behavior */
public void testSimple() throws Exception {
  Reader reader = new StringReader("我购买了道具和服装。");
  TokenizerFactory factory = new HMMChineseTokenizerFactory(new HashMap<String,String>());
  Tokenizer tokenizer = factory.create(newAttributeFactory(), reader);
  // TODO: fix smart chinese to not emit punctuation tokens
  // at the moment: you have to clean up with WDF, or use the stoplist, etc
  assertTokenStreamContents(tokenizer, 
     new String[] { "我", "购买", "了", "道具", "和", "服装", "," });
}
项目:search    文件:TestFactories.java   
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }

  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }

  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
项目:search    文件:SolrResourceLoader.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(String, FileFilter, boolean)}
 * and {@link #addToClassLoader(String,FileFilter,boolean)} before using
 * this ResourceLoader.
 */
void reloadLuceneSPI() {
  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
项目:sdb2    文件:IndexerServiceImpl.java   
@Override
public void index(final IndexType indexType, final Collection<Song> songs) {
    executor.execute(new Runnable() {
        @Override
        public void run() {
            Stopwatch stopwatch = Stopwatch.createStarted();

            Directory directory = new RAMDirectory();
            try {
                LOG.debug("available tokenizers: {}", TokenizerFactory.availableTokenizers());
                LOG.debug("available token filters: {}", TokenFilterFactory.availableTokenFilters());
                Analyzer analyzer = CustomAnalyzer.builder()
                    .withTokenizer("standard")
                    .addTokenFilter("lowercase")
                    .addTokenFilter("ngram", "minGramSize", "1", "maxGramSize", "25")
                    .build();
                IndexWriterConfig config = new IndexWriterConfig(analyzer);
                try (IndexWriter writer = new IndexWriter(directory, config)) {
                    for (Song song : songs) {
                        Document document = createDocument(song);
                        writer.addDocument(document);
                        songByUuid.put(song.getUUID(), song);
                    }
                } catch (IOException e) {
                    LOG.warn("couldn't index songs", e);
                }
            } catch (IOException e1) {
                LOG.warn("couldn't create analyzer", e1);
            } finally {
                putIndex(indexType, directory);
                stopwatch.stop();
                LOG.info("indexing songs in background thread took {}", stopwatch.toString());
            }
        }
    });
}
项目:community-edition-old    文件:AlfrescoFieldType.java   
public void add(Object current)
{
    if (!(current instanceof MultiTermAwareComponent))
        return;
    AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent();
    if (newComponent instanceof TokenFilterFactory)
    {
        if (filters == null)
        {
            filters = new ArrayList<TokenFilterFactory>(2);
        }
        filters.add((TokenFilterFactory) newComponent);
    }
    else if (newComponent instanceof TokenizerFactory)
    {
        tokenizer = (TokenizerFactory) newComponent;
    }
    else if (newComponent instanceof CharFilterFactory)
    {
        if (charFilters == null)
        {
            charFilters = new ArrayList<CharFilterFactory>(1);
        }
        charFilters.add((CharFilterFactory) newComponent);

    }
    else
    {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
    }
}
项目:mmseg4j-solr    文件:MMSegTokenizerFactoryTest.java   
private Dictionary getDictionaryByFieldType(String fieldTypeName) {
    FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(fieldTypeName);
    Analyzer a = ft.getIndexAnalyzer();
    Assert.assertEquals(a.getClass(), TokenizerChain.class);

    TokenizerChain tc = (TokenizerChain) a;
    TokenizerFactory tf = tc.getTokenizerFactory();
    Assert.assertEquals(tf.getClass(), MMSegTokenizerFactory.class);

    MMSegTokenizerFactory mtf = (MMSegTokenizerFactory) tf;

    Assert.assertNotNull(mtf.dic);
    return mtf.dic;
}
项目:NYBC    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:NYBC    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:NYBC    文件:TestFactories.java   
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }

  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }

  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
项目:NYBC    文件:SolrResourceLoader.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(String)}
 * and {@link #addToClassLoader(String,FileFilter)} before using
 * this ResourceLoader.
 */
void reloadLuceneSPI() {
  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
项目:search-core    文件:SolrResourceLoader.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(String)}
 * and {@link #addToClassLoader(String,FileFilter)} before using
 * this ResourceLoader.
 */
void reloadLuceneSPI() {
  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
项目:read-open-source-code    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:read-open-source-code    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:read-open-source-code    文件:SolrResourceLoader.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(String, FileFilter, boolean)}
 * and {@link #addToClassLoader(String,FileFilter,boolean)} before using
 * this ResourceLoader.
 */
void reloadLuceneSPI() {
  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
项目:read-open-source-code    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:read-open-source-code    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:read-open-source-code    文件:SolrResourceLoader.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(String, FileFilter, boolean)}
 * and {@link #addToClassLoader(String,FileFilter,boolean)} before using
 * this ResourceLoader.
 */
void reloadLuceneSPI() {
  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
项目:fess-solr-plugin    文件:SuggestFieldInfo.java   
public SuggestFieldInfo(final List<String> fieldNameList,
        final TokenizerFactory tokenizerFactory,
        final SuggestReadingConverter suggestReadingConverter,
        final SuggestNormalizer suggestNormalizer) {
    this.fieldNameList = fieldNameList;
    this.tokenizerFactory = tokenizerFactory;
    this.suggestReadingConverter = suggestReadingConverter;
    this.suggestNormalizer = suggestNormalizer;
}
项目:fess-solr-plugin    文件:DocumentReader.java   
public DocumentReader(final TokenizerFactory tokenizerFactory,
        final SuggestReadingConverter suggestReadingConverter,
        final SuggestNormalizer suggestNormalizer,
        final SolrInputDocument solrInputDocument,
        final List<String> targetFields,
        final List<String> targetLabelFields,
        final List<String> targetRoleFields, final String expiresField,
        final String segmentField, final Set<String> badWordSet) {
    this.solrInputDocument = solrInputDocument;
    this.targetFields = targetFields;
    this.targetLabelFields = targetLabelFields;
    this.targetRoleFields = targetRoleFields;
    this.tokenizerFactory = tokenizerFactory;
    this.expiresField = expiresField;
    this.segmentField = segmentField;
    this.suggestReadingConverter = suggestReadingConverter;
    this.suggestNormalizer = suggestNormalizer;
    this.badWordSet = badWordSet;

    final Object expireObj = solrInputDocument.getFieldValue(expiresField);
    if (expireObj != null) {
        expire = expireObj.toString();
    } else {
        expire = DateUtil.getThreadLocalDateFormat().format(new Date());
    }

    final Object segmentObj = solrInputDocument.getFieldValue(segmentField);
    if (segmentObj != null) {
        segment = segmentObj.toString();
    } else {
        segment = StringUtil.EMPTY;
    }
}
项目:fess-solr-plugin    文件:TestUtils.java   
public static TokenizerFactory getTokenizerFactory(
        final SuggestUpdateConfig config) {
    try {
        final Map<String, String> args = new HashMap<String, String>();
        final Class cls = Class.forName(config.getFieldConfigList().get(0)
                .getTokenizerConfig().getClassName());
        final Constructor constructor = cls.getConstructor(Map.class);
        final TokenizerFactory tokenizerFactory = (TokenizerFactory) constructor
                .newInstance(args);
        return tokenizerFactory;
    } catch (final Exception e) {
        e.printStackTrace();
        return null;
    }
}
项目:fess-solr-plugin    文件:SuggestUpdateControllerTest.java   
private List<SuggestFieldInfo> getSuggestFieldInfoList(
        final SuggestUpdateConfig config, final boolean multi) {
    final List<SuggestFieldInfo> list = new ArrayList<SuggestFieldInfo>();

    final List<String> fieldNameList = new ArrayList<String>();
    fieldNameList.add("content");

    final TokenizerFactory tokenizerFactory = TestUtils
            .getTokenizerFactory(config);
    final SuggestReadingConverter suggestReadingConverter = TestUtils
            .createConverter();
    final SuggestNormalizer suggestNormalizer = TestUtils
            .createNormalizer();

    final SuggestFieldInfo suggestFieldInfo = new SuggestFieldInfo(
            fieldNameList, tokenizerFactory, suggestReadingConverter,
            suggestNormalizer);
    list.add(suggestFieldInfo);

    if (multi) {
        final List<String> fieldNameList2 = new ArrayList<String>();
        fieldNameList2.add("title");

        final SuggestReadingConverter suggestReadingConverter2 = TestUtils
                .createConverter();
        final SuggestNormalizer suggestNormalizer2 = TestUtils
                .createNormalizer();

        final SuggestFieldInfo suggestFieldInfo2 = new SuggestFieldInfo(
                fieldNameList2, null, suggestReadingConverter2,
                suggestNormalizer2);
        list.add(suggestFieldInfo2);
    }

    return list;
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:Maskana-Gestor-de-Conocimiento    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFactories.java   
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }

  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }

  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
项目:elasticsearch_my    文件:AnalysisFactoryTestCase.java   
public void testTokenizers() {
    Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
    missing.removeAll(getTokenizers().keySet());
    assertTrue("new tokenizers found, please update KNOWN_TOKENIZERS: " + missing.toString(), missing.isEmpty());
}
项目:search    文件:TestFactories.java   
FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) {
  assert tokenizer != null;
  this.tokenizer = tokenizer;
  this.charFilter = charFilter;
  this.tokenfilter = tokenfilter;
}
项目:search    文件:TokenizerChain.java   
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
  this(null,tokenizer,filters);
}
项目:search    文件:TokenizerChain.java   
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
  this.charFilters = charFilters;
  this.tokenizer = tokenizer;
  this.filters = filters;
}
项目:NYBC    文件:TestFactories.java   
FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) {
  assert tokenizer != null;
  this.tokenizer = tokenizer;
  this.charFilter = charFilter;
  this.tokenfilter = tokenfilter;
}