Java 类org.apache.lucene.analysis.charfilter.NormalizeCharMap 实例源码

项目:search    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  // we can't add duplicate keys, or NormalizeCharMap gets angry
  Set<String> keys = new HashSet<>();
  int num = random.nextInt(5);
  //System.out.println("NormalizeCharMap=");
  for (int i = 0; i < num; i++) {
    String key = TestUtil.randomSimpleString(random);
    if (!keys.contains(key) && key.length() > 0) {
      String value = TestUtil.randomSimpleString(random);
      builder.add(key, value);
      keys.add(key);
      //System.out.println("mapping: '" + key + "' => '" + value + "'");
    }
  }
  return builder.build();
}
项目:NYBC    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  // we can't add duplicate keys, or NormalizeCharMap gets angry
  Set<String> keys = new HashSet<String>();
  int num = random.nextInt(5);
  //System.out.println("NormalizeCharMap=");
  for (int i = 0; i < num; i++) {
    String key = _TestUtil.randomSimpleString(random);
    if (!keys.contains(key) && key.length() > 0) {
      String value = _TestUtil.randomSimpleString(random);
      builder.add(key, value);
      keys.add(key);
      //System.out.println("mapping: '" + key + "' => '" + value + "'");
    }
  }
  return builder.build();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  // we can't add duplicate keys, or NormalizeCharMap gets angry
  Set<String> keys = new HashSet<String>();
  int num = random.nextInt(5);
  //System.out.println("NormalizeCharMap=");
  for (int i = 0; i < num; i++) {
    String key = _TestUtil.randomSimpleString(random);
    if (!keys.contains(key) && key.length() > 0) {
      String value = _TestUtil.randomSimpleString(random);
      builder.add(key, value);
      keys.add(key);
      //System.out.println("mapping: '" + key + "' => '" + value + "'");
    }
  }
  return builder.build();
}
项目:lucene-tokenizers-for-multilang    文件:MultistageMappingCharFilterFactory.java   
public NormalizeCharMap inform(ResourceLoader loader, String mapping) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<String>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    if (wlist.isEmpty()) {
        return null;
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    return builder.build();
  }
  return null;
}
项目:elasticsearch_my    文件:MappingCharFilterFactory.java   
public MappingCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name);

    List<String> rules = Analysis.getWordList(env, settings, "mappings");
    if (rules == null) {
        throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
    }

    NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
    parseRules(rules, normMapBuilder);
    normMap = normMapBuilder.build();
}
项目:elasticsearch_my    文件:MappingCharFilterFactory.java   
/**
 * parses a list of MappingCharFilter style rules into a normalize char map
 */
private void parseRules(List<String> rules, NormalizeCharMap.Builder map) {
    for (String rule : rules) {
        Matcher m = rulePattern.matcher(rule);
        if (!m.find())
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
        String lhs = parseString(m.group(1).trim());
        String rhs = parseString(m.group(2).trim());
        if (lhs == null || rhs == null)
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
        map.add(lhs, rhs);
    }
}
项目:lucene-bo    文件:TibCharFilter.java   
public final static NormalizeCharMap getTibNormalizeCharMap() {
    NormalizeCharMap.Builder builder  = new NormalizeCharMap.Builder(); 
    // The non-breaking tsheg is replaced by the normal one
    builder.add("\u0f0C", "\u0F0B");
    // Characters to delete: the markers found under selected syllables
    builder.add("\u0F35", ""); //  ༵
    builder.add("\u0F37", ""); //  ༷
    // Characters to decompose
    builder.add("\u0F00", "\u0F68\u0F7C\u0F7E"); //  ༀ 
    builder.add("\u0F02", "\u0F60\u0F70\u0F82"); // ༂
    builder.add("\u0F03", "\u0F60\u0F70\u0F14"); //  ༃
    builder.add("\u0F43", "\u0F42\u0FB7"); //  གྷ
    builder.add("\u0F48", "\u0F47\u0FB7"); //  ཈
    builder.add("\u0F4D", "\u0F4C\u0FB7"); //  ཌྷ
    builder.add("\u0F52", "\u0F51\u0FB7"); //  དྷ
    builder.add("\u0F57", "\u0F56\u0FB7"); //  བྷ
    builder.add("\u0F5C", "\u0F5B\u0FB7"); //  ཛྷ
    builder.add("\u0F69", "\u0F40\u0FB5"); //  ཀྵ
    builder.add("\u0F73", "\u0F71\u0F72"); //    ཱི
    builder.add("\u0F75", "\u0F71\u0F74"); //   ཱུ
    builder.add("\u0F76", "\u0FB2\u0F80"); //   ྲྀ
    builder.add("\u0F77", "\u0FB2\u0F71\u0F80"); //   ཷ
    builder.add("\u0F78", "\u0FB3\u0F80"); //   ླྀ
    builder.add("\u0F79", "\u0FB3\u0F71\u0F80"); //   ཹ
    builder.add("\u0F81", "\u0F71\u0F80"); //     ཱྀ
    builder.add("\u0F93", "\u0F92\u0FB7"); //  ྒྷ
    builder.add("\u0F9D", "\u0F9C\u0FB7"); //  ྜྷ
    builder.add("\u0FA2", "\u0FA1\u0FB7"); //  ྡྷ
    builder.add("\u0FA7", "\u0FA6\u0FB7"); //  ྦྷ
    builder.add("\u0FAC", "\u0FAB\u0FB7"); //  ྫྷ
    builder.add("\u0FB9", "\u0F90\u0FB5"); //  ྐྵ
    return builder.build();
}
项目:Elasticsearch    文件:MappingCharFilterFactory.java   
@Inject
public MappingCharFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name);

    List<String> rules = Analysis.getWordList(env, settings, "mappings");
    if (rules == null) {
        throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
    }

    NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
    parseRules(rules, normMapBuilder);
    normMap = normMapBuilder.build();
}
项目:Elasticsearch    文件:MappingCharFilterFactory.java   
/**
 * parses a list of MappingCharFilter style rules into a normalize char map
 */
private void parseRules(List<String> rules, NormalizeCharMap.Builder map) {
    for (String rule : rules) {
        Matcher m = rulePattern.matcher(rule);
        if (!m.find())
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
        String lhs = parseString(m.group(1).trim());
        String rhs = parseString(m.group(2).trim());
        if (lhs == null || rhs == null)
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
        map.add(lhs, rhs);
    }
}
项目:search    文件:TestPathHierarchyTokenizer.java   
public void testNormalizeWinDelimToLinuxDelim() throws Exception {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  builder.add("\\", "/");
  NormalizeCharMap normMap = builder.build();
  String path = "c:\\a\\b\\c";
  Reader cs = new MappingCharFilter(normMap, new StringReader(path));
  PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), cs, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
  assertTokenStreamContents(t,
      new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
      new int[]{0, 0, 0, 0},
      new int[]{2, 4, 6, 8},
      new int[]{1, 0, 0, 0},
      path.length());
}
项目:gerrit    文件:CustomMappingAnalyzer.java   
@Override
protected Reader wrapReader(String fieldName, Reader reader) {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  for (Map.Entry<String, String> e : customMappings.entrySet()) {
    builder.add(e.getKey(), e.getValue());
  }
  return new MappingCharFilter(builder.build(), reader);
}
项目:NYBC    文件:MappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  mapping = args.get("mapping");

  if (mapping != null) {
    List<String> wlist = null;
    File mappingFile = new File(mapping);
    if (mappingFile.exists()) {
      wlist = getLines(loader, mapping);
    } else {
      List<String> files = splitFileNames(mapping);
      wlist = new ArrayList<String>();
      for (String file : files) {
        List<String> lines = getLines(loader, file.trim());
        wlist.addAll(lines);
      }
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
项目:NYBC    文件:MappingCharFilterFactory.java   
protected void parseRules( List<String> rules, NormalizeCharMap.Builder builder ){
  for( String rule : rules ){
    Matcher m = p.matcher( rule );
    if( !m.find() )
      throw new IllegalArgumentException("Invalid Mapping Rule : [" + rule + "], file = " + mapping);
    builder.add( parseString( m.group( 1 ) ), parseString( m.group( 2 ) ) );
  }
}
项目:NYBC    文件:TestPathHierarchyTokenizer.java   
public void testNormalizeWinDelimToLinuxDelim() throws Exception {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  builder.add("\\", "/");
  NormalizeCharMap normMap = builder.build();
  String path = "c:\\a\\b\\c";
  Reader cs = new MappingCharFilter(normMap, new StringReader(path));
  PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
  assertTokenStreamContents(t,
      new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
      new int[]{0, 0, 0, 0},
      new int[]{2, 4, 6, 8},
      new int[]{1, 0, 0, 0},
      path.length());
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestPathHierarchyTokenizer.java   
public void testNormalizeWinDelimToLinuxDelim() throws Exception {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  builder.add("\\", "/");
  NormalizeCharMap normMap = builder.build();
  String path = "c:\\a\\b\\c";
  Reader cs = new MappingCharFilter(normMap, new StringReader(path));
  PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
  assertTokenStreamContents(t,
      new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
      new int[]{0, 0, 0, 0},
      new int[]{2, 4, 6, 8},
      new int[]{1, 0, 0, 0},
      path.length());
}
项目:lucene-tokenizers-for-multilang    文件:MultistageMappingCharFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
    String mapping = getOriginalArgs().get("mapping");
    if (mapping == null) {
        return;
    }
    for (String fileNames : mapping.split(PTN_STAGE_DELIMITER)) {
        fileNames = fileNames.replaceAll(PTN_REMOVE_ESCAPE_CHAR, "");
        NormalizeCharMap map = inform(loader, fileNames);
        if (map != null) {
            normMapList.add(map);
        }
    }
}
项目:lucene-tokenizers-for-multilang    文件:MultistageMappingCharFilterFactory.java   
@Override
public Reader create(Reader input) {
    for (NormalizeCharMap charMap : normMapList) {
        input = charMap == null ? input : new MappingCharFilter(charMap,
                input);
    }
    return input;
}