Java 类org.apache.lucene.analysis.ja.dict.ConnectionCosts 实例源码

项目:search    文件:TestJapaneseTokenizer.java   
public void testLatticeToDot() throws Exception {
  final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance());
  final Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      JapaneseTokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), reader, readDict(), false, Mode.SEARCH);
      tokenizer.setGraphvizFormatter(gv2);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };

  String input = "スペースステーションに行きます。うたがわしい。";
  String[] surfaceForms = {
      "スペース", "ステーション", "に", "行き", "ます", "。",
      "うたがわしい", "。"
  };
  assertAnalyzesTo(analyzer,
                   input,
                   surfaceForms);

  assertTrue(gv2.finish().indexOf("22.0") != -1);
}
项目:NYBC    文件:TestJapaneseTokenizer.java   
public void testLatticeToDot() throws Exception {
  final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance());
  final Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
      tokenizer.setGraphvizFormatter(gv2);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };

  String input = "スペースステーションに行きます。うたがわしい。";
  String[] surfaceForms = {
      "スペース", "ステーション", "に", "行き", "ます", "。",
      "うたがわしい", "。"
  };
  assertAnalyzesTo(analyzer,
                   input,
                   surfaceForms);

  assertTrue(gv2.finish().indexOf("22.0") != -1);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestJapaneseTokenizer.java   
public void testLatticeToDot() throws Exception {
  final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance());
  final Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH);
      tokenizer.setGraphvizFormatter(gv2);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };

  String input = "スペースステーションに行きます。うたがわしい。";
  String[] surfaceForms = {
      "スペース", "ステーション", "に", "行き", "ます", "。",
      "うたがわしい", "。"
  };
  assertAnalyzesTo(analyzer,
                   input,
                   surfaceForms);

  assertTrue(gv2.finish().indexOf("22.0") != -1);
}
项目:search    文件:GraphvizFormatter.java   
public GraphvizFormatter(ConnectionCosts costs) {
  this.costs = costs;
  this.bestPathMap = new HashMap<>();
  sb.append(formatHeader());
  sb.append("  init [style=invis]\n");
  sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
}
项目:NYBC    文件:GraphvizFormatter.java   
public GraphvizFormatter(ConnectionCosts costs) {
  this.costs = costs;
  this.bestPathMap = new HashMap<String, String>();
  sb.append(formatHeader());
  sb.append("  init [style=invis]\n");
  sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
}
项目:read-open-source-code    文件:GraphvizFormatter.java   
public GraphvizFormatter(ConnectionCosts costs) {
  this.costs = costs;
  this.bestPathMap = new HashMap<String, String>();
  sb.append(formatHeader());
  sb.append("  init [style=invis]\n");
  sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
}
项目:read-open-source-code    文件:GraphvizFormatter.java   
public GraphvizFormatter(ConnectionCosts costs) {
  this.costs = costs;
  this.bestPathMap = new HashMap<String, String>();
  sb.append(formatHeader());
  sb.append("  init [style=invis]\n");
  sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
}
项目:Maskana-Gestor-de-Conocimiento    文件:GraphvizFormatter.java   
public GraphvizFormatter(ConnectionCosts costs) {
  this.costs = costs;
  this.bestPathMap = new HashMap<String, String>();
  sb.append(formatHeader());
  sb.append("  init [style=invis]\n");
  sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
}
项目:search    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:NYBC    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 * 
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(null); // best effort NPE consumers that don't call reset()

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:read-open-source-code    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}
项目:Maskana-Gestor-de-Conocimiento    文件:JapaneseTokenizer.java   
/**
 * Create a new JapaneseTokenizer.
 *
 * @param factory the AttributeFactory to use
 * @param input Reader containing text
 * @param userDictionary Optional: if non-null, user dictionary.
 * @param discardPunctuation true if punctuation tokens should be dropped from the output.
 * @param mode tokenization mode.
 */
public JapaneseTokenizer
    (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
  super(factory, input);
  dictionary = TokenInfoDictionary.getInstance();
  fst = dictionary.getFST();
  unkDictionary = UnknownDictionary.getInstance();
  characterDefinition = unkDictionary.getCharacterDefinition();
  this.userDictionary = userDictionary;
  costs = ConnectionCosts.getInstance();
  fstReader = fst.getBytesReader();
  if (userDictionary != null) {
    userFST = userDictionary.getFST();
    userFSTReader = userFST.getBytesReader();
  } else {
    userFST = null;
    userFSTReader = null;
  }
  this.discardPunctuation = discardPunctuation;
  switch(mode){
    case SEARCH:
      searchMode = true;
      extendedMode = false;
      outputCompounds = true;
      break;
    case EXTENDED:
      searchMode = true;
      extendedMode = true;
      outputCompounds = false;
      break;
    default:
      searchMode = false;
      extendedMode = false;
      outputCompounds = false;
      break;
  }
  buffer.reset(this.input);

  resetState();

  dictionaryMap.put(Type.KNOWN, dictionary);
  dictionaryMap.put(Type.UNKNOWN, unkDictionary);
  dictionaryMap.put(Type.USER, userDictionary);
}