Java 类org.apache.lucene.analysis.util.AbstractAnalysisFactory 实例源码

项目:search    文件:TestSynonymMap.java   
public void testBigramTokenizer() throws Exception {
  SlowSynonymMap synMap;

  // prepare bi-gram tokenizer factory
  Map<String, String> args = new HashMap<>();
  args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, "4.4");
  args.put("minGramSize","2");
  args.put("maxGramSize","2");
  TokenizerFactory tf = new NGramTokenizerFactory(args);

  // (ab)->(bc)->(cd)->[ef][fg][gh]
  List<String> rules = new ArrayList<>();
  rules.add( "abcd=>efgh" );
  synMap = new SlowSynonymMap( true );
  SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
  assertEquals( 1, synMap.submap.size() );
  assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
  assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "ef" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "fg" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
项目:search    文件:TestFactories.java   
private void doTestTokenizer(String tokenizer) throws IOException {
  Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
  TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
      assertFalse(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
  }
}
项目:search    文件:TestFactories.java   
private void doTestTokenFilter(String tokenfilter) throws IOException {
  Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
  TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a charfilter or tokenizer here, this makes no sense
      assertTrue(mtc instanceof TokenFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
  }
}
项目:search    文件:TestFactories.java   
private void doTestCharFilter(String charfilter) throws IOException {
  Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
  CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a tokenizer or tokenfilter here, this makes no sense
      assertTrue(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
  }
}
项目:solr-jdbc    文件:JdbcStopFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select stopword from stopwords");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 somestring test2 anotherstring"));

   JdbcStopFilterFactory factory = new JdbcStopFilterFactory(args);
   factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("test1", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("test2", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:Palmetto    文件:SimpleAnalyzer.java   
public SimpleAnalyzer(boolean lowerCase) {
    Map<String, String> parameters = new HashMap<String, String>();
    parameters.put(PatternTokenizerFactory.PATTERN, PATTERN);
    parameters.put(PatternTokenizerFactory.GROUP, "0");
    parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM,
            version.name());
    tokenizerFactory = new PatternTokenizerFactory(parameters);
    if (lowerCase) {
        parameters = new HashMap<String, String>();
        parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM,
                version.name());
        lowerCaseFilterFactory = new LowerCaseFilterFactory(parameters);
    } else {
        lowerCaseFilterFactory = null;
    }
}
项目:NYBC    文件:TestFactories.java   
private void doTestTokenizer(String tokenizer) throws IOException {
  TokenizerFactory factory = TokenizerFactory.forName(tokenizer);
  if (initialize(factory)) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
      assertFalse(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
  }
}
项目:NYBC    文件:TestFactories.java   
private void doTestTokenFilter(String tokenfilter) throws IOException {
  TokenFilterFactory factory = TokenFilterFactory.forName(tokenfilter);
  if (initialize(factory)) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a charfilter or tokenizer here, this makes no sense
      assertTrue(mtc instanceof TokenFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
  }
}
项目:NYBC    文件:TestFactories.java   
private void doTestCharFilter(String charfilter) throws IOException {
  CharFilterFactory factory = CharFilterFactory.forName(charfilter);
  if (initialize(factory)) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a tokenizer or tokenfilter here, this makes no sense
      assertTrue(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
  }
}
项目:solr-jdbc-synonyms    文件:JdbcStopFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LUCENE_5_0_0.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select stopword from stopwords");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 somestring test2 anotherstring"));

   JdbcStopFilterFactory factory = new JdbcStopFilterFactory(args);
   factory.inform(new ClasspathResourceLoader());

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("test1", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("test2", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSynonymMap.java   
public void testBigramTokenizer() throws Exception {
  SlowSynonymMap synMap;

  // prepare bi-gram tokenizer factory
  Map<String, String> args = new HashMap<String, String>();
  args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, "4.4");
  args.put("minGramSize","2");
  args.put("maxGramSize","2");
  TokenizerFactory tf = new NGramTokenizerFactory(args);

  // (ab)->(bc)->(cd)->[ef][fg][gh]
  List<String> rules = new ArrayList<String>();
  rules.add( "abcd=>efgh" );
  synMap = new SlowSynonymMap( true );
  SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
  assertEquals( 1, synMap.submap.size() );
  assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
  assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "ef" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "fg" );
  assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFactories.java   
private void doTestTokenizer(String tokenizer) throws IOException {
  Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
  TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
      assertFalse(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFactories.java   
private void doTestTokenFilter(String tokenfilter) throws IOException {
  Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
  TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a charfilter or tokenizer here, this makes no sense
      assertTrue(mtc instanceof TokenFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFactories.java   
private void doTestCharFilter(String charfilter) throws IOException {
  Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
  CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a tokenizer or tokenfilter here, this makes no sense
      assertTrue(mtc instanceof CharFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
  }
}
项目:solr-jdbc    文件:JdbcSynonymFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select synonyms from synonyms");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 test2"));

   JdbcSynonymFilterFactory factory = new JdbcSynonymFilterFactory(args);
   factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("testA", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testB", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testC", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testD", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:community-edition-old    文件:AlfrescoFieldType.java   
public void add(Object current)
{
    if (!(current instanceof MultiTermAwareComponent))
        return;
    AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent();
    if (newComponent instanceof TokenFilterFactory)
    {
        if (filters == null)
        {
            filters = new ArrayList<TokenFilterFactory>(2);
        }
        filters.add((TokenFilterFactory) newComponent);
    }
    else if (newComponent instanceof TokenizerFactory)
    {
        tokenizer = (TokenizerFactory) newComponent;
    }
    else if (newComponent instanceof CharFilterFactory)
    {
        if (charFilters == null)
        {
            charFilters = new ArrayList<CharFilterFactory>(1);
        }
        charFilters.add((CharFilterFactory) newComponent);

    }
    else
    {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
    }
}
项目:NYBC    文件:LowerCaseTokenizerFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
  filt.setLuceneMatchVersion(luceneMatchVersion);
  filt.init(args);
  return filt;
}
项目:solr-jdbc-synonyms    文件:JdbcSynonymFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LUCENE_5_0_0.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select synonyms from synonyms");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 test2"));

   JdbcSynonymFilterFactory factory = new JdbcSynonymFilterFactory(args);
   factory.inform(new ClasspathResourceLoader());

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("testA", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testB", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testC", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testD", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:fess-solr-plugin    文件:MonitoringUtil.java   
public static String initBaseArgs(final Map<String, String> baseArgs,
        final String luceneVersion) {
    final String baseClass = baseArgs.remove(BASE_CLASS);
    baseArgs.put(CLASS, baseClass);
    baseArgs.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM,
            luceneVersion);
    return baseClass;
}
项目:lams    文件:IndicNormalizationFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:CJKWidthFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:GermanNormalizationFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:IrishLowerCaseFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:ArabicNormalizationFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:PersianCharFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:PersianNormalizationFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:MappingCharFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:HindiNormalizationFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:SoraniNormalizationFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:TurkishLowerCaseFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:GreekLowerCaseFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:ASCIIFoldingFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:UpperCaseFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:LowerCaseFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:lams    文件:LowerCaseTokenizerFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs()));
}
项目:knn4qa    文件:TokenizerParams.java   
/** Specifying which Lucene version we need */
private void addLuceneVersionParam() {
  mTokClassArgs.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, UtilConst.LUCENE_VERSION);  
}
项目:search    文件:JapaneseIterationMarkCharFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:search    文件:ICUFoldingFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:search    文件:ICUTransformFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}
项目:search    文件:ICUNormalizer2CharFilterFactory.java   
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
  return this;
}