@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { char text[] = termAtt.buffer(); int length = termAtt.length(); for (int i = 0; i < length; i++) { final char ch = text[i]; if (ch >= 0xFF01 && ch <= 0xFF5E) { // Fullwidth ASCII variants text[i] -= 0xFEE0; } else if (ch >= 0xFF65 && ch <= 0xFF9F) { // Halfwidth Katakana variants if ((ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(text, i, ch)) { length = StemmerUtil.delete(text, i--, length); } else { text[i] = KANA_NORM[ch - 0xFF65]; } } } termAtt.setLength(length); return true; } else { return false; } }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { int state = N; char buffer[] = termAtt.buffer(); int length = termAtt.length(); for (int i = 0; i < length; i++) { final char c = buffer[i]; switch(c) { case 'a': case 'o': state = U; break; case 'u': state = (state == N) ? U : V; break; case 'e': if (state == U) length = StemmerUtil.delete(buffer, i--, length); state = V; break; case 'i': case 'q': case 'y': state = V; break; case 'ä': buffer[i] = 'a'; state = V; break; case 'ö': buffer[i] = 'o'; state = V; break; case 'ü': buffer[i] = 'u'; state = V; break; case 'ß': buffer[i++] = 's'; buffer = termAtt.resizeBuffer(1+length); if (i < length) System.arraycopy(buffer, i, buffer, i+1, (length-i)); buffer[i] = 's'; length++; state = N; break; default: state = N; } } termAtt.setLength(length); return true; } else { return false; } }
@Override public boolean incrementToken() throws IOException { if (!input.incrementToken()) { return false; } char[] buffer = charTermAttribute.buffer(); int length = charTermAttribute.length(); int i; for (i = 0; i < length; i++) { if (buffer[i] == ae_se) { buffer[i] = ae; } else if (buffer[i] == AE_se) { buffer[i] = AE; } else if (buffer[i] == oe_se) { buffer[i] = oe; } else if (buffer[i] == OE_se) { buffer[i] = OE; } else if (length - 1 > i) { if (buffer[i] == 'a' && (buffer[i + 1] == 'a' || buffer[i + 1] == 'o' || buffer[i + 1] == 'A' || buffer[i + 1] == 'O')) { length = StemmerUtil.delete(buffer, i + 1, length); buffer[i] = aa; } else if (buffer[i] == 'A' && (buffer[i + 1] == 'a' || buffer[i + 1] == 'A' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O')) { length = StemmerUtil.delete(buffer, i + 1, length); buffer[i] = AA; } else if (buffer[i] == 'a' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E')) { length = StemmerUtil.delete(buffer, i + 1, length); buffer[i] = ae; } else if (buffer[i] == 'A' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E')) { length = StemmerUtil.delete(buffer, i + 1, length); buffer[i] = AE; } else if (buffer[i] == 'o' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O')) { length = StemmerUtil.delete(buffer, i + 1, length); buffer[i] = oe; } else if (buffer[i] == 'O' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O')) { length = StemmerUtil.delete(buffer, i + 1, length); buffer[i] = OE; } } } charTermAttribute.setLength(length); return true; }
@Override public boolean incrementToken() throws IOException { if (!input.incrementToken()) { return false; } char[] buffer = charTermAttribute.buffer(); int length = charTermAttribute.length(); int i; for (i = 0; i < length; i++) { if (buffer[i] == aa || buffer[i] == ae_se || buffer[i] == ae) { buffer[i] = 'a'; } else if (buffer[i] == AA || buffer[i] == AE_se || buffer[i] == AE) { buffer[i] = 'A'; } else if (buffer[i] == oe || buffer[i] == oe_se) { buffer[i] = 'o'; } else if (buffer[i] == OE || buffer[i] == OE_se) { buffer[i] = 'O'; } else if (length - 1 > i) { if ((buffer[i] == 'a' || buffer[i] == 'A') && (buffer[i + 1] == 'a' || buffer[i + 1] == 'A' || buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O') ) { length = StemmerUtil.delete(buffer, i + 1, length); } else if ((buffer[i] == 'o' || buffer[i] == 'O') && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O') ) { length = StemmerUtil.delete(buffer, i + 1, length); } } } charTermAttribute.setLength(length); return true; }