@Override public void reset() throws IOException { updateUserDictionary(); if (dictionaryTimestamp > tokenizerTimestamp) { if (VERBOSE) { System.out.println("Update KuromojiTokenizer (" + tokenizerTimestamp + "," + dictionaryTimestamp + ")"); } if (userDictionary != null) { try { tokenizerTimestamp = dictionaryTimestamp; userDictionaryField.set(tokenizer, userDictionary); final TokenInfoFST userFst = userDictionary.getFST(); userFSTField.set(tokenizer, userFst); userFSTReaderField.set(tokenizer, userFst.getBytesReader()); @SuppressWarnings("unchecked") final EnumMap<Type, Dictionary> dictionaryMap = (EnumMap<Type, Dictionary>) dictionaryMapField.get(tokenizer); dictionaryMap.put(Type.USER, userDictionary); } catch (final Exception e) { throw new IllegalStateException( "Failed to update the tokenizer.", e); } } } final Reader inputPending = getInputPending(); if (inputPending != ILLEGAL_STATE_READER) { tokenizer.setReader(inputPending); } tokenizer.reset(); }
public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) { this.wordId = wordId; this.surfaceForm = surfaceForm; this.offset = offset; this.length = length; this.type = type; this.position = position; this.dictionary = dictionary; }
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException { final int wordCost = dict.getWordCost(wordID); final int leftID = dict.getLeftId(wordID); int leastCost = Integer.MAX_VALUE; int leastIDX = -1; assert fromPosData.count > 0; for(int idx=0;idx<fromPosData.count;idx++) { // Cost is path cost so far, plus word cost (added at // end of loop), plus bigram cost: final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID); if (VERBOSE) { System.out.println(" fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID); } if (cost < leastCost) { leastCost = cost; leastIDX = idx; if (VERBOSE) { System.out.println(" **"); } } } leastCost += wordCost; if (VERBOSE) { System.out.println(" + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count); } if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) { final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos); if (VERBOSE) { if (penalty > 0) { System.out.println(" + penalty=" + penalty + " cost=" + (leastCost+penalty)); } } leastCost += penalty; } //positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type); assert leftID == dict.getRightId(wordID); positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type); }
Dictionary getDict(Type type) { return dictionaryMap.get(type); }