Java 类org.apache.lucene.analysis.ja.dict.TokenInfoDictionary 实例源码

项目:search    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:search    文件:TokenInfoDictionaryWriter.java   
public TokenInfoDictionaryWriter(int size) {
  super(TokenInfoDictionary.class, size);
}
项目:search    文件:TokenInfoDictionaryWriter.java   
@Override
public void write(String baseDir) throws IOException {
  super.write(baseDir);
  writeFST(getBaseFileName(baseDir) + TokenInfoDictionary.FST_FILENAME_SUFFIX);
}
项目:NYBC    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 * 
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(null); // best effort NPE consumers that don't call reset()

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:NYBC    文件:TokenInfoDictionaryWriter.java   
public TokenInfoDictionaryWriter(int size) {
  super(TokenInfoDictionary.class, size);
}
项目:NYBC    文件:TokenInfoDictionaryWriter.java   
@Override
public void write(String baseDir) throws IOException {
  super.write(baseDir);
  writeFST(getBaseFileName(baseDir) + TokenInfoDictionary.FST_FILENAME_SUFFIX);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:Maskana-Gestor-de-Conocimiento    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TokenInfoDictionaryWriter.java   
public TokenInfoDictionaryWriter(int size) {
  super(TokenInfoDictionary.class, size);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TokenInfoDictionaryWriter.java   
@Override
public void write(String baseDir) throws IOException {
  super.write(baseDir);
  writeFST(getBaseFileName(baseDir) + TokenInfoDictionary.FST_FILENAME_SUFFIX);
}