/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = builder.finish(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); }
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { Map<String,String> mappings = new TreeMap<>(); for (int i = 0; i < num; i++) { String line = reader.readLine(); String parts[] = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); } if (mappings.put(parts[1], parts[2]) != null) { throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); } } Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String,String> entry : mappings.entrySet()) { Util.toUTF16(entry.getKey(), scratchInts); builder.add(scratchInts.get(), new CharsRef(entry.getValue())); } return builder.finish(); }
/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRef scratch = new IntsRef(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = builder.finish(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); }
public void testReplacements() throws Exception { Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); // a -> b Util.toUTF16("a", scratchInts); builder.add(scratchInts.get(), new CharsRef("b")); // ab -> c Util.toUTF16("ab", scratchInts); builder.add(scratchInts.get(), new CharsRef("c")); // c -> de Util.toUTF16("c", scratchInts); builder.add(scratchInts.get(), new CharsRef("de")); // def -> gh Util.toUTF16("def", scratchInts); builder.add(scratchInts.get(), new CharsRef("gh")); FST<CharsRef> fst = builder.finish(); StringBuilder sb = new StringBuilder("atestanother"); Dictionary.applyMappings(fst, sb); assertEquals("btestbnother", sb.toString()); sb = new StringBuilder("abtestanother"); Dictionary.applyMappings(fst, sb); assertEquals("ctestbnother", sb.toString()); sb = new StringBuilder("atestabnother"); Dictionary.applyMappings(fst, sb); assertEquals("btestcnother", sb.toString()); sb = new StringBuilder("abtestabnother"); Dictionary.applyMappings(fst, sb); assertEquals("ctestcnother", sb.toString()); sb = new StringBuilder("abtestabcnother"); Dictionary.applyMappings(fst, sb); assertEquals("ctestcdenother", sb.toString()); sb = new StringBuilder("defdefdefc"); Dictionary.applyMappings(fst, sb); assertEquals("ghghghde", sb.toString()); }