@Override protected void decompose() { // get the hyphenation points Hyphenation hyphens = hyphenator.hyphenate(termAtt.buffer(), 0, termAtt.length(), 1, 1); // No hyphen points found -> exit if (hyphens == null) { return; } final int[] hyp = hyphens.getHyphenationPoints(); for (int i = 0; i < hyp.length; ++i) { int remaining = hyp.length - i; int start = hyp[i]; CompoundToken longestMatchToken = null; for (int j = 1; j < remaining; j++) { int partLength = hyp[i + j] - start; // if the part is longer than maxSubwordSize we // are done with this round if (partLength > this.maxSubwordSize) { break; } // we only put subwords to the token stream // that are longer than minPartSize if (partLength < this.minSubwordSize) { // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the // calculation above, and we rely upon minSubwordSize being >=0 to filter them out... continue; } // check the dictionary if (dictionary == null || dictionary.contains(termAtt.buffer(), start, partLength)) { if (this.onlyLongestMatch) { if (longestMatchToken != null) { if (longestMatchToken.txt.length() < partLength) { longestMatchToken = new CompoundToken(start, partLength); } } else { longestMatchToken = new CompoundToken(start, partLength); } } else { tokens.add(new CompoundToken(start, partLength)); } } else if (dictionary.contains(termAtt.buffer(), start, partLength - 1)) { // check the dictionary again with a word that is one character // shorter // to avoid problems with genitive 's characters and other binding // characters if (this.onlyLongestMatch) { if (longestMatchToken != null) { if (longestMatchToken.txt.length() < partLength - 1) { longestMatchToken = new CompoundToken(start, partLength - 1); } } else { longestMatchToken = new CompoundToken(start, partLength - 1); } } else { tokens.add(new CompoundToken(start, partLength - 1)); } } } if (this.onlyLongestMatch && longestMatchToken!=null) { tokens.add(longestMatchToken); } } }