@Override public Tokenizer create() { if (reverse) { return new ReversePathHierarchyTokenizer(bufferSize, delimiter, replacement, skip); } return new PathHierarchyTokenizer(bufferSize, delimiter, replacement, skip); }
/** * Retrieve tokens representing the host of the given URL * @param url URL to be tokenized * @param partStringRaw raw (not url decoded) string containing the host * @param partString potentially url decoded string containing the host * @return host tokens * @throws IOException */ private List<Token> getHostTokens(String url, String partStringRaw, String partString) throws IOException { int start = getStartIndex(url, partStringRaw); if (!tokenizeHost || InetAddresses.isInetAddress(partString)) { int end = getEndIndex(start, partStringRaw); return Collections.singletonList(new Token(partString, URLPart.HOST, start, end)); } return tokenize(URLPart.HOST, addReader(new ReversePathHierarchyTokenizer('.', '.'), new StringReader(partString)), start); }
/** * Tokenize the given email address based on the desired {@link EmailPart} and currently set tokenizer options. * @param email the email address to be tokenized * @param part the desired part of the email address * @return a list of {@link Token}s parsed from the given address * @throws IOException */ private List<Token> tokenize(final String email, final EmailPart part) throws IOException { String partString = getPart(email, part); if (Strings.isNullOrEmpty(partString)) { // desired part was not found return new ArrayList<>(); } int start = 0; int end = 0; switch (part) { case LOCALPART: return tokenizeLocalPart(partString); case DOMAIN: start = getStartIndex(email, partString); if (!tokenizeDomian) { end = getEndIndex(start, partString); return ImmutableList.of(new Token(partString, part, start, end)); } ReversePathHierarchyTokenizer tokenizer = new ReversePathHierarchyTokenizer('.', '.'); tokenizer.setReader(new StringReader(partString)); return tokenize(part, tokenizer, start); case WHOLE: end = partString.length(); break; } return ImmutableList.of(new Token(partString, part, start, end)); }
@Override public Tokenizer create(Reader input) { if( reverse ) { return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip); } return new PathHierarchyTokenizer(input, delimiter, replacement, skip); }