public String[] formatEntry(String[] features) { if (this.format == DictionaryFormat.IPADIC) { return features; } else { String[] features2 = new String[13]; features2[0] = features[0]; features2[1] = features[1]; features2[2] = features[2]; features2[3] = features[3]; features2[4] = features[4]; features2[5] = features[5]; features2[6] = features[6]; features2[7] = features[7]; features2[8] = features[8]; features2[9] = features[9]; features2[10] = features[11]; // If the surface reading is non-existent, use surface form for reading and pronunciation. // This happens with punctuation in UniDic and there are possibly other cases as well if (features[13].length() == 0) { features2[11] = features[0]; features2[12] = features[0]; } else { features2[11] = features[13]; features2[12] = features[13]; } return features2; } }
public TokenInfoDictionaryBuilder(DictionaryFormat format, String encoding, boolean normalizeEntries) { this.format = format; this.encoding = encoding; this.normalizeEntries = normalizeEntries; this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null; }