boolean inSelectScope(String targetName) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element el = stack.get(pos); String elName = el.nodeName(); if (elName.equals(targetName)) return true; if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except return false; } Validate.fail("Should not be reachable"); return false; }
/** Remove a list of allowed elements from a whitelist. (If a tag is not allowed, it will be removed from the HTML.) @param tags tag names to disallow @return this (for chaining) */ public Whitelist removeTags(String... tags) { Validate.notNull(tags); for(String tag: tags) { Validate.notEmpty(tag); TagName tagName = TagName.valueOf(tag); if(tagNames.remove(tagName)) { // Only look in sub-maps if tag was allowed attributes.remove(tagName); enforcedAttributes.remove(tagName); protocols.remove(tagName); } } return this; }
/** Add a list of allowed attributes to a tag. (If an attribute is not allowed on an element, it will be removed.) <p> E.g.: <code>addAttributes("a", "href", "class")</code> allows <code>href</code> and <code>class</code> attributes on <code>a</code> tags. </p> <p> To make an attribute valid for <b>all tags</b>, use the pseudo tag <code>:all</code>, e.g. <code>addAttributes(":all", "class")</code>. </p> @param tag The tag the attributes are for. The tag will be added to the allowed tag list if necessary. @param keys List of valid attributes for the tag @return this (for chaining) */ public Whitelist addAttributes(String tag, String... keys) { Validate.notEmpty(tag); Validate.notNull(keys); Validate.isTrue(keys.length > 0, "No attributes supplied."); TagName tagName = TagName.valueOf(tag); if (!tagNames.contains(tagName)) tagNames.add(tagName); Set<AttributeKey> attributeSet = new HashSet<AttributeKey>(); for (String key : keys) { Validate.notEmpty(key); attributeSet.add(AttributeKey.valueOf(key)); } if (attributes.containsKey(tagName)) { Set<AttributeKey> currentSet = attributes.get(tagName); currentSet.addAll(attributeSet); } else { attributes.put(tagName, attributeSet); } return this; }
/** Add an enforced attribute to a tag. An enforced attribute will always be added to the element. If the element already has the attribute set, it will be overridden. <p> E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make all <code>a</code> tags output as <code><a href="..." rel="nofollow"></code> </p> @param tag The tag the enforced attribute is for. The tag will be added to the allowed tag list if necessary. @param key The attribute key @param value The enforced attribute value @return this (for chaining) */ public Whitelist addEnforcedAttribute(String tag, String key, String value) { Validate.notEmpty(tag); Validate.notEmpty(key); Validate.notEmpty(value); TagName tagName = TagName.valueOf(tag); if (!tagNames.contains(tagName)) tagNames.add(tagName); AttributeKey attrKey = AttributeKey.valueOf(key); AttributeValue attrVal = AttributeValue.valueOf(value); if (enforcedAttributes.containsKey(tagName)) { enforcedAttributes.get(tagName).put(attrKey, attrVal); } else { Map<AttributeKey, AttributeValue> attrMap = new HashMap<AttributeKey, AttributeValue>(); attrMap.put(attrKey, attrVal); enforcedAttributes.put(tagName, attrMap); } return this; }
/** Remove a previously configured enforced attribute from a tag. @param tag The tag the enforced attribute is for. @param key The attribute key @return this (for chaining) */ public Whitelist removeEnforcedAttribute(String tag, String key) { Validate.notEmpty(tag); Validate.notEmpty(key); TagName tagName = TagName.valueOf(tag); if(tagNames.contains(tagName) && enforcedAttributes.containsKey(tagName)) { AttributeKey attrKey = AttributeKey.valueOf(key); Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.get(tagName); attrMap.remove(attrKey); if(attrMap.isEmpty()) // Remove tag from enforced attribute map if no enforced attributes are present enforcedAttributes.remove(tagName); } return this; }
void insertInFosterParent(Node in) { Element fosterParent; Element lastTable = getFromStack("table"); boolean isLastTableParent = false; if (lastTable != null) { if (lastTable.parent() != null) { fosterParent = lastTable.parent(); isLastTableParent = true; } else fosterParent = aboveOnStack(lastTable); } else { // no table == frag fosterParent = stack.get(0); } if (isLastTableParent) { Validate.notNull(lastTable); // last table cannot be null by this point. lastTable.before(in); } else fosterParent.appendChild(in); }
@Override protected boolean process(Token token) { // start tag, end tag, doctype, comment, character, eof switch (token.type) { case StartTag: insert(token.asStartTag()); break; case EndTag: popStackToClose(token.asEndTag()); break; case Comment: insert(token.asComment()); break; case Character: insert(token.asCharacter()); break; case Doctype: insert(token.asDoctype()); break; case EOF: // could put some normalisation here if desired break; default: Validate.fail("Unexpected token type: " + token.type); } return true; }
void emit(Token token) { Validate.isFalse(isEmitPending, "There is an unread token pending!"); emitPending = token; isEmitPending = true; if (token.type == Token.TokenType.StartTag) { Token.StartTag startTag = (Token.StartTag) token; lastStartTag = startTag.tagName; if (startTag.selfClosing) selfClosingFlagAcknowledged = false; } else if (token.type == Token.TokenType.EndTag) { Token.EndTag endTag = (Token.EndTag) token; if (endTag.attributes != null) error("Attributes incorrectly present on end tag"); } }
/** * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. * <p> * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). * </p> * * @param tagName Name of tag, e.g. "p". Case insensitive. * @return The tag, either defined or new generic. */ public static Tag valueOf(String tagName) { Validate.notNull(tagName); Tag tag = tags.get(tagName); if (tag == null) { tagName = tagName.trim().toLowerCase(); Validate.notEmpty(tagName); tag = tags.get(tagName); if (tag == null) { // not defined: create default; go anywhere, do anything! (incl be inside a <p>) tag = new Tag(tagName); tag.isBlock = false; tag.canContainBlock = true; } } return tag; }
/** * Find elements matching selector. * * @param query CSS selector * @param roots root elements to descend into * @return matching elements, empty if none */ public static Elements select(String query, Iterable<Element> roots) { Validate.notEmpty(query); Validate.notNull(roots); Evaluator evaluator = QueryParser.parse(query); ArrayList<Element> elements = new ArrayList<Element>(); IdentityHashMap<Element, Boolean> seenElements = new IdentityHashMap<Element, Boolean>(); // dedupe elements by identity, not equality for (Element root : roots) { final Elements found = select(evaluator, root); for (Element el : found) { if (!seenElements.containsKey(el)) { elements.add(el); seenElements.put(el, Boolean.TRUE); } } } return new Elements(elements); }
protected boolean process(Token token) { switch (token.type) { case StartTag: insert(token.asStartTag()); break; case EndTag: popStackToClose(token.asEndTag()); break; case Comment: insert(token.asComment()); break; case Character: insert(token.asCharacter()); break; case Doctype: insert(token.asDoctype()); break; case EOF: // could put some normalisation here if desired break; default: Validate.fail("Unexpected token type: " + token.type); } return true; }
public XPathEvaluator parse() { while (!tq.isEmpty()) { Validate.isFalse(noEvalAllow, "XPath error! No operator allowed after attribute or function!" + tq); if (tq.matchChomp(OR_COMBINATOR)) { tq.consumeWhitespace(); return combineXPathEvaluator(tq.remainder()); } else if (tq.matchesAny(HIERARCHY_COMBINATORS)) { combinator(tq.consumeAny(HIERARCHY_COMBINATORS)); } else { findElements(); } tq.consumeWhitespace(); } return collectXPathEvaluator(); }
private void functionRegex(String remainder) { Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder); List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1))); if (params.size() == 1) { elementOperator = new ElementOperator.Regex(params.get(0)); } else if (params.size() == 2) { if (params.get(0).startsWith("@")) { elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1)); } else { elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1))); } } else if (params.size() == 3) { elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2))); } else { throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder); } }
public static String trimQuotes(String str) { Validate.isTrue(str != null && str.length() > 0); String quote = str.substring(0, 1); if (StringUtil.in(quote, "\"", "'")) { Validate.isTrue(str.endsWith(quote), "Quote" + " for " + str + " is incomplete!"); str = str.substring(1, str.length() - 1); } return str; }
public static ChatImpl createChat(SkypeImpl client, String identity) throws ConnectionException, ChatNotFoundException { Validate.notNull(client, "Client must not be null"); Validate.notEmpty(identity, "Identity must not be null/empty"); ChatImpl result = null; if (identity.startsWith("19:")) { if (identity.endsWith("@thread.skype")) { result = new ChatGroup(client, identity); } else if (identity.endsWith("@p2p.thread.skype")) { result = new ChatP2P(client, identity); } } else if (identity.startsWith("8:")) { result = new ChatIndividual(client, identity); } else if (identity.startsWith("28:")) { result = new ChatBot(client, identity); } if (result != null) { result.load(); return result; } throw new IllegalArgumentException(String.format("Unknown chat type with identity %s", identity)); }
public static ParticipantImpl createParticipant(SkypeImpl client, ChatImpl chat, String id) throws ConnectionException { Validate.notNull(client, "Client must not be null"); Validate.notNull(chat, "Chat must not be null"); Validate.notEmpty(id, "Identity must not be null/empty"); ParticipantImpl result = null; if (id.startsWith("8:")) { result = new UserImpl(client, chat, id); } else if (id.startsWith("28:")) { result = new BotImpl(client, chat, id); } if (result != null) { return result; } throw new IllegalArgumentException(String.format("Unknown participant type with id %s", id)); }
private void byTag() { String tagName = tq.consumeElementSelector(); Validate.notEmpty(tagName); // namespaces: if element name is "abc:def", selector must be "abc|def", so flip: if (tagName.contains("|")) tagName = tagName.replace("|", ":"); evals.add(new Evaluator.Tag(tagName.trim().toLowerCase())); }
private void byAttribute() { TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue String key = cq.consumeToAny(AttributeEvals); // eq, not, start, end, contain, match, (no val) Validate.notEmpty(key); cq.consumeWhitespace(); if (cq.isEmpty()) { if (key.startsWith("^")) evals.add(new Evaluator.AttributeStarting(key.substring(1))); else evals.add(new Evaluator.Attribute(key)); } else { if (cq.matchChomp("=")) evals.add(new Evaluator.AttributeWithValue(key, cq.remainder())); else if (cq.matchChomp("!=")) evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder())); else if (cq.matchChomp("^=")) evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder())); else if (cq.matchChomp("$=")) evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder())); else if (cq.matchChomp("*=")) evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder())); else if (cq.matchChomp("~=")) evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder()))); else throw new Selector.SelectorParseException( "Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder()); } }
private void contains(boolean own) { tq.consume(own ? ":containsOwn" : ":contains"); String searchText = TokenQueue.unescape(tq.chompBalanced('(', ')')); Validate.notEmpty(searchText, ":contains(text) query must not be empty"); if (own) evals.add(new Evaluator.ContainsOwnText(searchText)); else evals.add(new Evaluator.ContainsText(searchText)); }
private void matches(boolean own) { tq.consume(own ? ":matchesOwn" : ":matches"); String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex bits will be escaped Validate.notEmpty(regex, ":matches(regex) query must not be empty"); if (own) evals.add(new Evaluator.MatchesOwn(Pattern.compile(regex))); else evals.add(new Evaluator.Matches(Pattern.compile(regex))); }
private void not() { tq.consume(":not"); String subQuery = tq.chompBalanced('(', ')'); Validate.notEmpty(subQuery, ":not(selector) subselect must not be empty"); evals.add(new StructuralEvaluator.Not(parse(subQuery))); }
/** Add a list of allowed elements to a whitelist. (If a tag is not allowed, it will be removed from the HTML.) @param tags tag names to allow @return this (for chaining) */ public Whitelist addTags(String... tags) { Validate.notNull(tags); for (String tagName : tags) { Validate.notEmpty(tagName); tagNames.add(TagName.valueOf(tagName)); } return this; }
/** Add allowed URL protocols for an element's URL attribute. This restricts the possible values of the attribute to URLs with the defined protocol. <p> E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code> </p> <p> To allow a link to an in-page URL anchor (i.e. <code><a href="#anchor"></code>, add a <code>#</code>:<br> E.g.: <code>addProtocols("a", "href", "#")</code> </p> @param tag Tag the URL protocol is for @param key Attribute key @param protocols List of valid protocols @return this, for chaining */ public Whitelist addProtocols(String tag, String key, String... protocols) { Validate.notEmpty(tag); Validate.notEmpty(key); Validate.notNull(protocols); TagName tagName = TagName.valueOf(tag); AttributeKey attrKey = AttributeKey.valueOf(key); Map<AttributeKey, Set<Protocol>> attrMap; Set<Protocol> protSet; if (this.protocols.containsKey(tagName)) { attrMap = this.protocols.get(tagName); } else { attrMap = new HashMap<AttributeKey, Set<Protocol>>(); this.protocols.put(tagName, attrMap); } if (attrMap.containsKey(attrKey)) { protSet = attrMap.get(attrKey); } else { protSet = new HashSet<Protocol>(); attrMap.put(attrKey, protSet); } for (String protocol : protocols) { Validate.notEmpty(protocol); Protocol prot = Protocol.valueOf(protocol); protSet.add(prot); } return this; }
/** Remove allowed URL protocols for an element's URL attribute. <p> E.g.: <code>removeProtocols("a", "href", "ftp")</code> </p> @param tag Tag the URL protocol is for @param key Attribute key @param protocols List of invalid protocols @return this, for chaining */ public Whitelist removeProtocols(String tag, String key, String... protocols) { Validate.notEmpty(tag); Validate.notEmpty(key); Validate.notNull(protocols); TagName tagName = TagName.valueOf(tag); AttributeKey attrKey = AttributeKey.valueOf(key); if(this.protocols.containsKey(tagName)) { Map<AttributeKey, Set<Protocol>> attrMap = this.protocols.get(tagName); if(attrMap.containsKey(attrKey)) { Set<Protocol> protSet = attrMap.get(attrKey); for (String protocol : protocols) { Validate.notEmpty(protocol); Protocol prot = Protocol.valueOf(protocol); protSet.remove(prot); } if(protSet.isEmpty()) { // Remove protocol set if empty attrMap.remove(attrKey); if(attrMap.isEmpty()) // Remove entry for tag if empty this.protocols.remove(tagName); } } } return this; }
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element el = stack.get(pos); String elName = el.nodeName(); if (StringUtil.in(elName, targetNames)) return true; if (StringUtil.in(elName, baseTypes)) return false; if (extraTypes != null && StringUtil.in(elName, extraTypes)) return false; } Validate.fail("Should not be reachable"); return false; }
void reconstructFormattingElements() { Element last = lastFormattingElement(); if (last == null || onStack(last)) return; Element entry = last; int size = formattingElements.size(); int pos = size - 1; boolean skip = false; while (true) { if (pos == 0) { // step 4. if none before, skip to 8 skip = true; break; } entry = formattingElements.get(--pos); // step 5. one earlier than entry if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack break; // jump to 8, else continue back to 4 } while(true) { if (!skip) // step 7: on later than entry entry = formattingElements.get(++pos); Validate.notNull(entry); // should not occur, as we break at last element // 8. create new element from element, 9 insert into current node, onto stack skip = false; // can only skip increment from 4. Element newEl = insertStartTag(entry.nodeName()); // todo: avoid fostering here? // newEl.namespace(entry.namespace()); // todo: namespaces newEl.attributes().addAll(entry.attributes()); // 10. replace entry with new entry formattingElements.set(pos, newEl); // 11 if (pos == size-1) // if not last entry in list, jump to 7 break; } }
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) { Validate.notNull(input, "String input must not be null"); Validate.notNull(baseUri, "BaseURI must not be null"); doc = new Document(baseUri); reader = new CharacterReader(input); this.errors = errors; tokeniser = new Tokeniser(reader, errors); stack = new ArrayList<Element>(32); this.baseUri = baseUri; }
private Selector(String query, Element root) { Validate.notNull(query); query = query.trim(); Validate.notEmpty(query); Validate.notNull(root); this.evaluator = QueryParser.parse(query); this.root = root; }
private Selector(Evaluator evaluator, Element root) { Validate.notNull(evaluator); Validate.notNull(root); this.evaluator = evaluator; this.root = root; }
/** * Perform a depth-first traversal on each of the selected elements. * @param nodeVisitor the visitor callbacks to perform on each node * @return this, for chaining */ public Elements traverse(NodeVisitor nodeVisitor) { Validate.notNull(nodeVisitor); NodeTraversor traversor = new NodeTraversor(nodeVisitor); for (Element el: this) { traversor.traverse(el); } return this; }
public AttributeKeyPair(String key, String value) { Validate.notEmpty(key); Validate.notEmpty(value); this.key = key.trim().toLowerCase(); if (value.startsWith("\"") && value.endsWith("\"") || value.startsWith("'") && value.endsWith("'")) { value = value.substring(1, value.length()-1); } this.value = value.trim().toLowerCase(); }