Java 类org.apache.lucene.analysis.ja.dict.CharacterDefinition 实例源码

项目:search    文件:CharacterDefinitionWriter.java   
public void write(String baseDir) throws IOException {
  String filename = baseDir + File.separator +
    CharacterDefinition.class.getName().replace('.', File.separatorChar) + CharacterDefinition.FILENAME_SUFFIX;
  new File(filename).getParentFile().mkdirs();
  OutputStream os = new FileOutputStream(filename);
  try {
    os = new BufferedOutputStream(os);
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  } finally {
    os.close();
  }
}
项目:NYBC    文件:CharacterDefinitionWriter.java   
public void write(String baseDir) throws IOException {
  String filename = baseDir + File.separator +
    CharacterDefinition.class.getName().replace('.', File.separatorChar) + CharacterDefinition.FILENAME_SUFFIX;
  new File(filename).getParentFile().mkdirs();
  OutputStream os = new FileOutputStream(filename);
  try {
    os = new BufferedOutputStream(os);
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  } finally {
    os.close();
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:CharacterDefinitionWriter.java   
public void write(String baseDir) throws IOException {
  String filename = baseDir + File.separator +
    CharacterDefinition.class.getName().replace('.', File.separatorChar) + CharacterDefinition.FILENAME_SUFFIX;
  new File(filename).getParentFile().mkdirs();
  OutputStream os = new FileOutputStream(filename);
  try {
    os = new BufferedOutputStream(os);
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  } finally {
    os.close();
  }
}
项目:search    文件:CharacterDefinitionWriter.java   
/**
 * Put mapping from unicode code point to character class.
 * 
 * @param codePoint
 *            code point
 * @param characterClassName character class name
 */
public void putCharacterCategory(int codePoint, String characterClassName) {
  characterClassName = characterClassName.split(" ")[0]; // use first
  // category
  // class

  // Override Nakaguro
  if (codePoint == 0x30FB) {
    characterClassName = "SYMBOL";
  }
  characterCategoryMap[codePoint] = CharacterDefinition.lookupCharacterClass(characterClassName);
}
项目:search    文件:UnknownDictionaryBuilder.java   
public UnknownDictionaryWriter readDictionaryFile(String filename, String encoding)
    throws IOException {
  UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);

  FileInputStream inputStream = new FileInputStream(filename);
  Charset cs = Charset.forName(encoding);
  CharsetDecoder decoder = cs.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);
  InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
  LineNumberReader lineReader = new LineNumberReader(streamReader);

  dictionary.put(CSVUtil.parse(NGRAM_DICTIONARY_ENTRY));

  List<String[]> lines = new ArrayList<>();
  String line = null;
  while ((line = lineReader.readLine()) != null) {
    // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
    // even though the unknown dictionary returns hardcoded null here.
    final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
    lines.add(parsed);
  }

  Collections.sort(lines, new Comparator<String[]>() {
    public int compare(String[] left, String[] right) {
      int leftId = CharacterDefinition.lookupCharacterClass(left[0]);
      int rightId = CharacterDefinition.lookupCharacterClass(right[0]);
      return leftId - rightId;
    }
  });

  for (String[] entry : lines) {
    dictionary.put(entry);
  }

  return dictionary;
}
项目:search    文件:UnknownDictionaryWriter.java   
@Override
public int put(String[] entry) {
  // Get wordId of current entry
  int wordId = buffer.position();

  // Put entry
  int result = super.put(entry);

  // Put entry in targetMap
  int characterId = CharacterDefinition.lookupCharacterClass(entry[0]);
  addMapping(characterId, wordId);
  return result;
}
项目:NYBC    文件:CharacterDefinitionWriter.java   
/**
 * Put mapping from unicode code point to character class.
 * 
 * @param codePoint
 *            code point
 * @param characterClassName character class name
 */
public void putCharacterCategory(int codePoint, String characterClassName) {
  characterClassName = characterClassName.split(" ")[0]; // use first
  // category
  // class

  // Override Nakaguro
  if (codePoint == 0x30FB) {
    characterClassName = "SYMBOL";
  }
  characterCategoryMap[codePoint] = CharacterDefinition.lookupCharacterClass(characterClassName);
}
项目:NYBC    文件:UnknownDictionaryBuilder.java   
public UnknownDictionaryWriter readDictionaryFile(String filename, String encoding)
    throws IOException {
  UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);

  FileInputStream inputStream = new FileInputStream(filename);
  Charset cs = Charset.forName(encoding);
  CharsetDecoder decoder = cs.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);
  InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
  LineNumberReader lineReader = new LineNumberReader(streamReader);

  dictionary.put(CSVUtil.parse(NGRAM_DICTIONARY_ENTRY));

  List<String[]> lines = new ArrayList<String[]>();
  String line = null;
  while ((line = lineReader.readLine()) != null) {
    // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
    // even though the unknown dictionary returns hardcoded null here.
    final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
    lines.add(parsed);
  }

  Collections.sort(lines, new Comparator<String[]>() {
    public int compare(String[] left, String[] right) {
      int leftId = CharacterDefinition.lookupCharacterClass(left[0]);
      int rightId = CharacterDefinition.lookupCharacterClass(right[0]);
      return leftId - rightId;
    }
  });

  for (String[] entry : lines) {
    dictionary.put(entry);
  }

  return dictionary;
}
项目:NYBC    文件:UnknownDictionaryWriter.java   
@Override
public int put(String[] entry) {
  // Get wordId of current entry
  int wordId = buffer.position();

  // Put entry
  int result = super.put(entry);

  // Put entry in targetMap
  int characterId = CharacterDefinition.lookupCharacterClass(entry[0]);
  addMapping(characterId, wordId);
  return result;
}
项目:Maskana-Gestor-de-Conocimiento    文件:CharacterDefinitionWriter.java   
/**
 * Put mapping from unicode code point to character class.
 * 
 * @param codePoint
 *            code point
 * @param characterClassName character class name
 */
public void putCharacterCategory(int codePoint, String characterClassName) {
  characterClassName = characterClassName.split(" ")[0]; // use first
  // category
  // class

  // Override Nakaguro
  if (codePoint == 0x30FB) {
    characterClassName = "SYMBOL";
  }
  characterCategoryMap[codePoint] = CharacterDefinition.lookupCharacterClass(characterClassName);
}
项目:Maskana-Gestor-de-Conocimiento    文件:UnknownDictionaryBuilder.java   
public UnknownDictionaryWriter readDictionaryFile(String filename, String encoding)
    throws IOException {
  UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);

  FileInputStream inputStream = new FileInputStream(filename);
  Charset cs = Charset.forName(encoding);
  CharsetDecoder decoder = cs.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);
  InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
  LineNumberReader lineReader = new LineNumberReader(streamReader);

  dictionary.put(CSVUtil.parse(NGRAM_DICTIONARY_ENTRY));

  List<String[]> lines = new ArrayList<String[]>();
  String line = null;
  while ((line = lineReader.readLine()) != null) {
    // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
    // even though the unknown dictionary returns hardcoded null here.
    final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
    lines.add(parsed);
  }

  Collections.sort(lines, new Comparator<String[]>() {
    public int compare(String[] left, String[] right) {
      int leftId = CharacterDefinition.lookupCharacterClass(left[0]);
      int rightId = CharacterDefinition.lookupCharacterClass(right[0]);
      return leftId - rightId;
    }
  });

  for (String[] entry : lines) {
    dictionary.put(entry);
  }

  return dictionary;
}
项目:Maskana-Gestor-de-Conocimiento    文件:UnknownDictionaryWriter.java   
@Override
public int put(String[] entry) {
  // Get wordId of current entry
  int wordId = buffer.position();

  // Put entry
  int result = super.put(entry);

  // Put entry in targetMap
  int characterId = CharacterDefinition.lookupCharacterClass(entry[0]);
  addMapping(characterId, wordId);
  return result;
}
项目:search    文件:CharacterDefinitionWriter.java   
/**
 * Constructor for building. TODO: remove write access
 */
public CharacterDefinitionWriter() {
  Arrays.fill(characterCategoryMap, CharacterDefinition.DEFAULT);
}
项目:search    文件:CharacterDefinitionWriter.java   
public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
  final byte characterClass = CharacterDefinition.lookupCharacterClass(characterClassName);
  invokeMap[characterClass] = invoke == 1;
  groupMap[characterClass] = group == 1;
  // TODO: length def ignored
}
项目:NYBC    文件:CharacterDefinitionWriter.java   
/**
 * Constructor for building. TODO: remove write access
 */
public CharacterDefinitionWriter() {
  Arrays.fill(characterCategoryMap, CharacterDefinition.DEFAULT);
}
项目:NYBC    文件:CharacterDefinitionWriter.java   
public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
  final byte characterClass = CharacterDefinition.lookupCharacterClass(characterClassName);
  invokeMap[characterClass] = invoke == 1;
  groupMap[characterClass] = group == 1;
  // TODO: length def ignored
}
项目:Maskana-Gestor-de-Conocimiento    文件:CharacterDefinitionWriter.java   
/**
 * Constructor for building. TODO: remove write access
 */
public CharacterDefinitionWriter() {
  Arrays.fill(characterCategoryMap, CharacterDefinition.DEFAULT);
}
项目:Maskana-Gestor-de-Conocimiento    文件:CharacterDefinitionWriter.java   
public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
  final byte characterClass = CharacterDefinition.lookupCharacterClass(characterClassName);
  invokeMap[characterClass] = invoke == 1;
  groupMap[characterClass] = group == 1;
  // TODO: length def ignored
}