Java 类org.apache.lucene.analysis.util.ResourceLoader 实例源码

项目:lams    文件:HyphenationCompoundWordTokenFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
      hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    } else {
      hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    }
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
项目:lams    文件:MappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
项目:lams    文件:HunspellStemFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  String dicts[] = dictionaryFiles.split(",");

  InputStream affix = null;
  List<InputStream> dictionaries = new ArrayList<>();

  try {
    dictionaries = new ArrayList<>();
    for (String file : dicts) {
      dictionaries.add(loader.openResource(file));
    }
    affix = loader.openResource(affixFile);

    this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
  } catch (ParseException e) {
    throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
  } finally {
    IOUtils.closeWhileHandlingException(affix);
    IOUtils.closeWhileHandlingException(dictionaries);
  }
}
项目:lams    文件:StemmerOverrideFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    assureMatchVersion();
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}
项目:mmseg4j    文件:Utils.java   
public static Dictionary getDict(String dicPath, ResourceLoader loader) {
    Dictionary dic = null;
    if(dicPath != null) {
        File f = new File(dicPath);
        if(!f.isAbsolute() && loader instanceof SolrResourceLoader) {   //相对目录
            SolrResourceLoader srl = (SolrResourceLoader) loader;
            dicPath = srl.getInstanceDir()+dicPath;
            f = new File(dicPath);
        }

        dic = Dictionary.getInstance(f);
    } else {
        dic = Dictionary.getInstance();
    }
    return dic;
}
项目:IK-Analyzer-2012FF    文件:IKSynonymAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer token = new IKTokenizer(reader, useSmart);
    Map<String, String> paramsMap = new HashMap<String, String>();
    Configuration cfg = DefaultConfig.getInstance();
    paramsMap.put("luceneMatchVersion", luceneMatchVersion.toString());
    paramsMap.put("synonyms", cfg.getExtSynonymDictionarys().get(0));
    paramsMap.put("ignoreCase", "true");
    SynonymFilterFactory factory = new SynonymFilterFactory(paramsMap);
    ResourceLoader loader = new ClasspathResourceLoader();
    try {
        factory.inform(loader);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return new TokenStreamComponents(token, factory.create(token));
}
项目:search    文件:JapaneseTokenizerFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    InputStream stream = loader.openResource(userDictionaryPath);
    String encoding = userDictionaryEncoding;
    if (encoding == null) {
      encoding = IOUtils.UTF_8;
    }
    CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
    Reader reader = new InputStreamReader(stream, decoder);
    userDictionary = new UserDictionary(reader);
  } else {
    userDictionary = null;
  }
}
项目:search    文件:ICUTokenizerFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  assert tailored != null : "init must be called first!";
  if (tailored.isEmpty()) {
    config = new DefaultICUTokenizerConfig(cjkAsWords);
  } else {
    final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
    for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
      int code = entry.getKey();
      String resourcePath = entry.getValue();
      breakers[code] = parseRules(resourcePath, loader);
    }
    config = new DefaultICUTokenizerConfig(cjkAsWords) {

      @Override
      public BreakIterator getBreakIterator(int script) {
        if (breakers[script] != null) {
          return (BreakIterator) breakers[script].clone();
        } else {
          return super.getBreakIterator(script);
        }
      }
      // TODO: we could also allow codes->types mapping
    };
  }
}
项目:search    文件:PhoneticFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  clazz = registry.get(name.toUpperCase(Locale.ROOT));
  if( clazz == null ) {
    clazz = resolveEncoder(name, loader);
  }

  if (maxCodeLength != null) {
    try {
      setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
    } catch (Exception e) {
      throw new IllegalArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e);
    }
  }

  getEncoder();//trigger initialization for potential problems to be thrown now
}
项目:search    文件:HyphenationCompoundWordTokenFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
      hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    } else {
      hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    }
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
项目:search    文件:MappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
项目:search    文件:HunspellStemFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  String dicts[] = dictionaryFiles.split(",");

  InputStream affix = null;
  List<InputStream> dictionaries = new ArrayList<>();

  try {
    dictionaries = new ArrayList<>();
    for (String file : dicts) {
      dictionaries.add(loader.openResource(file));
    }
    affix = loader.openResource(affixFile);

    this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
  } catch (ParseException e) {
    throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
  } finally {
    IOUtils.closeWhileHandlingException(affix);
    IOUtils.closeWhileHandlingException(dictionaries);
  }
}
项目:search    文件:StemmerOverrideFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    assureMatchVersion();
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}
项目:search    文件:StopFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (stopWordFiles != null) {
    if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
      stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
    } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
      stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
    } else {
      throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
    }
  } else {
    if (null != format) {
      throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
    }
    if (luceneMatchVersion == null) {
      stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    } else {
      stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    }
  }
}
项目:search    文件:TestKeepFilterFactory.java   
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
项目:querqy    文件:SimpleCommonRulesRewriterFactory.java   
@Override
public RewriterFactory createRewriterFactory(NamedList<?> args,
      ResourceLoader resourceLoader) throws IOException {
   String rulesResourceName = (String) args.get("rules");
   if (rulesResourceName == null) {
      throw new IllegalArgumentException("Property 'rules' not configured");
   }

   Boolean ignoreCase = args.getBooleanArg("ignoreCase");

   // querqy parser for queries that are part of the instructions in the
   // rules
   String rulesQuerqyParser = (String) args.get("querqyParser");
   QuerqyParserFactory querqyParser = null;
   if (rulesQuerqyParser != null) {
      rulesQuerqyParser = rulesQuerqyParser.trim();
      if (rulesQuerqyParser.length() > 0) {
         querqyParser = resourceLoader.newInstance(rulesQuerqyParser, QuerqyParserFactory.class);
      }
   }

   return new querqy.rewrite.commonrules.SimpleCommonRulesRewriterFactory(
         new InputStreamReader(resourceLoader.openResource(rulesResourceName), "UTF-8"), querqyParser, ignoreCase != null && ignoreCase);
}
项目:read-open-source-code    文件:JapaneseTokenizerFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    InputStream stream = loader.openResource(userDictionaryPath);
    String encoding = userDictionaryEncoding;
    if (encoding == null) {
      encoding = IOUtils.UTF_8;
    }
    CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
    Reader reader = new InputStreamReader(stream, decoder);
    userDictionary = new UserDictionary(reader);
  } else {
    userDictionary = null;
  }
}
项目:auto-phrase-tokenfilter    文件:AutoPhrasingQParserPlugin.java   
private CharArraySet getWordSet( ResourceLoader loader,
                               String wordFiles, boolean ignoreCase)
                               throws IOException {
   List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
   if (files.size() > 0) {
     // default stopwords list has 35 or so words, but maybe don't make it that
     // big to start
     words = new CharArraySet( files.size() * 10, ignoreCase);
     for (String file : files) {
       List<String> wlist = getLines(loader, file.trim());
    words.addAll(StopFilter.makeStopSet( wlist, ignoreCase));
     }
   }
   return words;
 }
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
      hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    } else {
      hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    }
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
项目:mmseg4j-solr    文件:Utils.java   
public static Dictionary getDict(String dicPath, ResourceLoader loader) {
    Dictionary dic = null;
    if(dicPath != null) {
        File f = new File(dicPath);
        if(!f.isAbsolute() && loader instanceof SolrResourceLoader) {   //相对目录
            SolrResourceLoader srl = (SolrResourceLoader) loader;
            dicPath = srl.getInstancePath().resolve(dicPath).toString();
            f = new File(dicPath);
        }

        dic = Dictionary.getInstance(f);
    } else {
        dic = Dictionary.getInstance();
    }
    return dic;
}
项目:read-open-source-code    文件:MappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
项目:NYBC    文件:TypeTokenFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  String stopTypesFiles = args.get("types");
  enablePositionIncrements = getBoolean("enablePositionIncrements", false);
  useWhitelist = getBoolean("useWhitelist", false);
  if (stopTypesFiles != null) {
    List<String> files = splitFileNames(stopTypesFiles);
    if (files.size() > 0) {
      stopTypes = new HashSet<String>();
      for (String file : files) {
        List<String> typesLines = getLines(loader, file.trim());
        stopTypes.addAll(typesLines);
      }
    }
  } else {
    throw new IllegalArgumentException("Missing required parameter: types.");
  }
}
项目:NYBC    文件:TestHyphenationCompoundWordTokenFilterFactory.java   
/**
 * Ensure the factory works with hyphenation grammar+dictionary: using default options.
 */
public void testHyphenationWithDictionary() throws Exception {
  Reader reader = new StringReader("min veninde som er lidt af en læsehest");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  Map<String,String> args = new HashMap<String,String>();
  args.put("hyphenator", "da_UTF8.xml");
  args.put("dictionary", "da_compoundDictionary.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  TokenStream stream = factory.create(tokenizer);

  assertTokenStreamContents(stream, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
项目:NYBC    文件:TestHyphenationCompoundWordTokenFilterFactory.java   
/**
 * Ensure the factory works with no dictionary: using hyphenation grammar only.
 * Also change the min/max subword sizes from the default. When using no dictionary,
 * its generally necessary to tweak these, or you get lots of expansions.
 */
public void testHyphenationOnly() throws Exception {
  Reader reader = new StringReader("basketballkurv");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  Map<String,String> args = new HashMap<String,String>();
  args.put("hyphenator", "da_UTF8.xml");
  args.put("minSubwordSize", "2");
  args.put("maxSubwordSize", "4");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  TokenStream stream = factory.create(tokenizer);

  assertTokenStreamContents(stream,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );
}
项目:read-open-source-code    文件:HunspellStemFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  String dicts[] = dictionaryFiles.split(",");

  InputStream affix = null;
  List<InputStream> dictionaries = new ArrayList<>();

  try {
    dictionaries = new ArrayList<>();
    for (String file : dicts) {
      dictionaries.add(loader.openResource(file));
    }
    affix = loader.openResource(affixFile);

    this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
  } catch (ParseException e) {
    throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
  } finally {
    IOUtils.closeWhileHandlingException(affix);
    IOUtils.closeWhileHandlingException(dictionaries);
  }
}
项目:NYBC    文件:TestDelimitedPayloadTokenFilterFactory.java   
public void testDelim() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName());
  args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}
项目:read-open-source-code    文件:StopFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (stopWordFiles != null) {
    if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
      stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
    } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
      stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
    } else {
      throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
    }
  } else {
    if (null != format) {
      throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
    }
    stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
  }
}
项目:NYBC    文件:TestCommonGramsQueryFilterFactory.java   
/**
 * If no words are provided, then a set of english default stopwords is used.
 */
public void testDefaults() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  Map<String, String> args = Collections.emptyMap();
  factory.init(args);
  factory.inform(loader);
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue(words.contains("the"));
  Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, 
      new String[] { "testing_the", "the_factory" });
}
项目:read-open-source-code    文件:ICUTokenizerFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  assert tailored != null : "init must be called first!";
  if (tailored.isEmpty()) {
    config = new DefaultICUTokenizerConfig(cjkAsWords);
  } else {
    final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
    for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
      int code = entry.getKey();
      String resourcePath = entry.getValue();
      breakers[code] = parseRules(resourcePath, loader);
    }
    config = new DefaultICUTokenizerConfig(cjkAsWords) {

      @Override
      public BreakIterator getBreakIterator(int script) {
        if (breakers[script] != null) {
          return (BreakIterator) breakers[script].clone();
        } else {
          return super.getBreakIterator(script);
        }
      }
      // TODO: we could also allow codes->types mapping
    };
  }
}
项目:read-open-source-code    文件:StemmerOverrideFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    assureMatchVersion();
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}
项目:NYBC    文件:TestTypeTokenFilterFactory.java   
@Test
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
  Map<String, String> args = new HashMap<String, String>();
  args.put("types", "stoptypes-1.txt");
  args.put("enablePositionIncrements", "true");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  Set<String> types = factory.getStopTypes();
  assertTrue("types is null and it shouldn't be", types != null);
  assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
  assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());

  factory = new TypeTokenFilterFactory();
  args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
  args.put("enablePositionIncrements", "false");
  args.put("useWhitelist","true");
  factory.init(args);
  factory.inform(loader);
  types = factory.getStopTypes();
  assertTrue("types is null and it shouldn't be", types != null);
  assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
  assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
}
项目:read-open-source-code    文件:HunspellStemFilterFactory.java   
/**
 * Loads the hunspell dictionary and affix files defined in the configuration
 *  
 * @param loader ResourceLoader used to load the files
 */
@Override
public void inform(ResourceLoader loader) throws IOException {
  String dictionaryFiles[] = dictionaryArg.split(",");

  InputStream affix = null;
  List<InputStream> dictionaries = new ArrayList<InputStream>();

  try {
    dictionaries = new ArrayList<InputStream>();
    for (String file : dictionaryFiles) {
      dictionaries.add(loader.openResource(file));
    }
    affix = loader.openResource(affixFile);

    this.dictionary = new HunspellDictionary(affix, dictionaries, luceneMatchVersion, ignoreCase, strictAffixParsing);
  } catch (ParseException e) {
    throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaryArg + ",affix=" + affixFile + "]", e);
  } finally {
    IOUtils.closeWhileHandlingException(affix);
    IOUtils.closeWhileHandlingException(dictionaries);
  }
}
项目:read-open-source-code    文件:PhoneticFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  clazz = registry.get(name.toUpperCase(Locale.ROOT));
  if( clazz == null ) {
    clazz = resolveEncoder(name, loader);
  }

  if (maxCodeLength != null) {
    try {
      setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
    } catch (Exception e) {
      throw new IllegalArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e);
    }
  }

  getEncoder();//trigger initialization for potential problems to be thrown now
}
项目:read-open-source-code    文件:StemmerOverrideFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    assureMatchVersion();
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}
项目:read-open-source-code    文件:StopFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (stopWordFiles != null) {
    if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
      stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
    } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
      stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
    } else {
      throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
    }
  } else {
    if (null != format) {
      throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
    }
    if (luceneMatchVersion == null) {
      stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    } else {
      stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    }
  }
}
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
项目:read-open-source-code    文件:MappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<String>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
项目:read-open-source-code    文件:JapaneseTokenizerFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    InputStream stream = loader.openResource(userDictionaryPath);
    String encoding = userDictionaryEncoding;
    if (encoding == null) {
      encoding = IOUtils.UTF_8;
    }
    CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
    Reader reader = new InputStreamReader(stream, decoder);
    userDictionary = new UserDictionary(reader);
  } else {
    userDictionary = null;
  }
}
项目:read-open-source-code    文件:MappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<String>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
项目:read-open-source-code    文件:StemmerOverrideFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    assureMatchVersion();
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}