public void testLatticeToDot() throws Exception { final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance()); final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { JapaneseTokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), reader, readDict(), false, Mode.SEARCH); tokenizer.setGraphvizFormatter(gv2); return new TokenStreamComponents(tokenizer, tokenizer); } }; String input = "スペースステーションに行きます。うたがわしい。"; String[] surfaceForms = { "スペース", "ステーション", "に", "行き", "ます", "。", "うたがわしい", "。" }; assertAnalyzesTo(analyzer, input, surfaceForms); assertTrue(gv2.finish().indexOf("22.0") != -1); }
public void testLatticeToDot() throws Exception { final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance()); final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH); tokenizer.setGraphvizFormatter(gv2); return new TokenStreamComponents(tokenizer, tokenizer); } }; String input = "スペースステーションに行きます。うたがわしい。"; String[] surfaceForms = { "スペース", "ステーション", "に", "行き", "ます", "。", "うたがわしい", "。" }; assertAnalyzesTo(analyzer, input, surfaceForms); assertTrue(gv2.finish().indexOf("22.0") != -1); }
public GraphvizFormatter(ConnectionCosts costs) { this.costs = costs; this.bestPathMap = new HashMap<>(); sb.append(formatHeader()); sb.append(" init [style=invis]\n"); sb.append(" init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n"); }
public GraphvizFormatter(ConnectionCosts costs) { this.costs = costs; this.bestPathMap = new HashMap<String, String>(); sb.append(formatHeader()); sb.append(" init [style=invis]\n"); sb.append(" init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n"); }
/** * Create a new JapaneseTokenizer. * * @param factory the AttributeFactory to use * @param input Reader containing text * @param userDictionary Optional: if non-null, user dictionary. * @param discardPunctuation true if punctuation tokens should be dropped from the output. * @param mode tokenization mode. */ public JapaneseTokenizer (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) { super(factory, input); dictionary = TokenInfoDictionary.getInstance(); fst = dictionary.getFST(); unkDictionary = UnknownDictionary.getInstance(); characterDefinition = unkDictionary.getCharacterDefinition(); this.userDictionary = userDictionary; costs = ConnectionCosts.getInstance(); fstReader = fst.getBytesReader(); if (userDictionary != null) { userFST = userDictionary.getFST(); userFSTReader = userFST.getBytesReader(); } else { userFST = null; userFSTReader = null; } this.discardPunctuation = discardPunctuation; switch(mode){ case SEARCH: searchMode = true; extendedMode = false; outputCompounds = true; break; case EXTENDED: searchMode = true; extendedMode = true; outputCompounds = false; break; default: searchMode = false; extendedMode = false; outputCompounds = false; break; } buffer.reset(this.input); resetState(); dictionaryMap.put(Type.KNOWN, dictionary); dictionaryMap.put(Type.UNKNOWN, unkDictionary); dictionaryMap.put(Type.USER, userDictionary); }
/** * Create a new JapaneseTokenizer. * * @param input Reader containing text * @param userDictionary Optional: if non-null, user dictionary. * @param discardPunctuation true if punctuation tokens should be dropped from the output. * @param mode tokenization mode. */ public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) { super(input); dictionary = TokenInfoDictionary.getInstance(); fst = dictionary.getFST(); unkDictionary = UnknownDictionary.getInstance(); characterDefinition = unkDictionary.getCharacterDefinition(); this.userDictionary = userDictionary; costs = ConnectionCosts.getInstance(); fstReader = fst.getBytesReader(); if (userDictionary != null) { userFST = userDictionary.getFST(); userFSTReader = userFST.getBytesReader(); } else { userFST = null; userFSTReader = null; } this.discardPunctuation = discardPunctuation; switch(mode){ case SEARCH: searchMode = true; extendedMode = false; outputCompounds = true; break; case EXTENDED: searchMode = true; extendedMode = true; outputCompounds = false; break; default: searchMode = false; extendedMode = false; outputCompounds = false; break; } buffer.reset(null); // best effort NPE consumers that don't call reset() resetState(); dictionaryMap.put(Type.KNOWN, dictionary); dictionaryMap.put(Type.UNKNOWN, unkDictionary); dictionaryMap.put(Type.USER, userDictionary); }