private boolean isIllegalStringInTag(final Element tag){ final String[] illegalWords = {"advert", "werbung", "anzeige", "adsense"}; if (tag == null){ return false; } for (final String word : illegalWords) { final Attributes tagAttrs = tag.attributes(); if (tagAttrs != null){ for(final Attribute attr : tagAttrs){ if(attr.toString().toLowerCase().contains(word.toLowerCase())){ return true; } } }else{ return false; } } return false; }
public void setQuestion(ExamSectionQuestion esq) { Document doc = Jsoup.parse(esq.getQuestion().getQuestion()); Elements blanks = doc.select(CLOZE_SELECTOR); blanks.forEach(b -> { boolean isNumeric = isNumeric(b); Iterator<Attribute> it = b.attributes().iterator(); while (it.hasNext()) { Attribute a = it.next(); if (!a.getKey().equals("id")) { it.remove(); } } b.tagName("input"); b.text(""); b.attr("type", isNumeric ? "number" : "text"); b.attr("class", "cloze-input"); if (isNumeric) { b.attr("step", "any"); // Should allow for using both comma and period as decimal separator b.attr( "lang", "en-150"); } }); this.question = doc.body().children().toString(); }
/** * Guess the type of the expression based on where it is used. * The guessed type can be overridden by adding a Cast to the desired type at the * beginning of the expression. * @param attribute The attribute the expression is in * @return */ private String getExpressionReturnTypeForAttribute(Attribute attribute) { String attributeName = attribute.getKey().toLowerCase(); if (attributeName.indexOf("@") == 0 || attributeName.indexOf("v-on:") == 0) return "void"; if ("v-if".equals(attributeName) || "v-show".equals(attributeName)) return "boolean"; if (currentProp != null) return currentProp.getType().toString(); return Any.class.getCanonicalName(); }
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
private static void renameAllAttributeKeys( ImmutableMap<String, String> renameMap, Element element) { Attributes attributes = element.attributes(); for (Attribute attribute : attributes) { String key = attribute.getKey(); // Polymer events are referenced as strings. As a result they do not participate in renaming. // Additionally, it is not valid to have a Polymer property start with "on". if (!key.startsWith("on-")) { String renamedProperty = renameMap.get( CaseFormat.LOWER_HYPHEN.to(CaseFormat.LOWER_CAMEL, key)); if (renamedProperty != null) { attribute.setKey(CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_HYPHEN, renamedProperty)); } } } }
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
/** * this method gets the parent node of the node in param * with attribute Class not null * @param n * @return */ public Node searchDirectParentWithAttribute(Node n){ if (n!=null) { Attributes attributes =n.attributes(); List <Attribute> list_attributes= attributes.asList(); if (list_attributes.size()>0){ for (int i=0; i<list_attributes.size(); i++){ String attributeHtml =list_attributes.get(i).html(); if(attributeHtml.toLowerCase().contains("class=")) { if(list_attributes.get(i).getValue().length()>0) { return n; } } } return searchDirectParentWithAttribute( n.parent()); } else { return searchDirectParentWithAttribute( n.parent()); } } else { return n; } }
boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } else { // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); } return false; }
/** * Produce predictable html (attributes in alphabetical order), always * include close tags */ private String elementToHtml(Element producedElem, StringBuilder sb) { ArrayList<String> names = new ArrayList<String>(); for (Attribute a : producedElem.attributes().asList()) { names.add(a.getKey()); } Collections.sort(names); sb.append("<" + producedElem.tagName() + ""); for (String attrName : names) { sb.append(" ").append(attrName).append("=").append("\'") .append(producedElem.attr(attrName)).append("\'"); } sb.append(">"); for (Node child : producedElem.childNodes()) { if (child instanceof Element) { elementToHtml((Element) child, sb); } else if (child instanceof TextNode) { String text = ((TextNode) child).text(); sb.append(text.trim()); } } sb.append("</").append(producedElem.tagName()).append(">"); return sb.toString(); }
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
/** * Remove the comments of the page * * @param node */ private void removeMalformedAttributes(Node node) { // as we are removing child nodes while iterating, we cannot use a normal foreach over children, // or will get a concurrent list modification error. int i = 0; while (i < node.childNodes().size()) { Node child = node.childNode(i); for (Attribute attr : child.attributes()) { if (attr.getKey().startsWith("\"") && attr.getKey().endsWith("\"")) { child.removeAttr(attr.getKey()); } } removeMalformedAttributes(child); i++; } }
/** * * @param element * @return wheter either one attribute of the current element, either its * text, either one attribute of one of its parent or the text of one of * its parents contains the "captcha" keyword */ private boolean parseAttributeToExtractCaptcha(Element element) { if (element.nodeName().equalsIgnoreCase(HTML_ELEMENT) || element.nodeName().equalsIgnoreCase(BODY_ELEMENT)) { return false; } if (StringUtils.containsIgnoreCase(element.ownText(), CAPTCHA_KEY)) { return true; } else { for (Attribute attr : element.attributes()) { if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEY)) { return true; } } } return false; }
@Override public void handle(boolean allAttr, String attrKey, Element element, List<SIPNode> ret) { if (allAttr) { for (Attribute attribute : element.attributes()) { ret.add(SIPNode.t(element.absUrl(attribute.getKey()))); } } else { String value = element.absUrl(attrKey); if (StringUtils.isNotBlank(value)) { ret.add(SIPNode.t(value)); } } }
@Inject public DefaultMarkdownManager(Set<Extension> contributedExtensions, Set<HtmlTransformer> htmlTransformers) { this.contributedExtensions = contributedExtensions; this.htmlTransformers = htmlTransformers; whiteList = new Whitelist() { @Override protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { if (attr.getKey().startsWith("data-")) return true; else return super.isSafeAttribute(tagName, el, attr); } }; whiteList.addTags(SAFE_TAGS) .addAttributes("a", "href", "title") .addAttributes("img", "align", "alt", "height", "src", "title", "width") .addAttributes("div", "itemscope", "itemtype") .addAttributes(":all", SAFE_ATTRIBUTES) .addProtocols("a", "href", SAFE_ANCHOR_SCHEMES) .addProtocols("blockquote", "cite", "http", "https") .addProtocols("cite", "cite", "http", "https") .addProtocols("img", "src", "http", "https") .addProtocols("q", "cite", "http", "https") .preserveRelativeLinks(true); }
public void setQuestionWithResults(ExamSectionQuestion esq) { Map<String, String> answers = asMap(new Gson()); Document doc = Jsoup.parse(esq.getQuestion().getQuestion()); Elements blanks = doc.select(CLOZE_SELECTOR); score = new Score(); blanks.forEach(b -> { boolean isNumeric = isNumeric(b); boolean isCorrectAnswer = isCorrectAnswer(b, answers); String precision = b.attr("precision"); if (isCorrectAnswer) { score.correctAnswers++; } else { score.incorrectAnswers++; } Iterator<Attribute> it = b.attributes().iterator(); while (it.hasNext()) { Attribute a = it.next(); if (!a.getKey().equals("id")) { it.remove(); } } b.tagName("input"); b.text(""); b.attr("class", isCorrectAnswer ? "cloze-correct" : "cloze-incorrect"); b.attr("type", isNumeric ? "number" : "text"); if (isNumeric) { b.append("<span class=\"cloze-precision\">[±" + precision + "]</span>"); } }); this.question = doc.body().children().toString(); }
/** * Process Element node to check for vue attributes. * @param element Current node being processed */ private void processElementNode(Element element) { Optional<LocalComponent> localComponent = context.getLocalComponent(element.tagName()); // Iterate on element attributes Set<LocalComponentProp> foundProps = new HashSet<>(); for (Attribute attribute : element.attributes()) { String attributeName = attribute.getKey().toLowerCase(); if ("v-for".equals(attributeName) || "v-model".equals(attributeName)) continue; Optional<LocalComponentProp> optionalProp = localComponent.flatMap(lc -> lc.getPropForAttribute(attributeName)); optionalProp.ifPresent(foundProps::add); if (!VUE_ATTR_PATTERN.matcher(attributeName).matches()) { optionalProp.ifPresent(this::validateStringPropBinding); continue; } currentAttribute = attribute; currentProp = optionalProp.orElse(null); currentExpressionReturnType = getExpressionReturnTypeForAttribute(attribute); attribute.setValue(processExpression(attribute.getValue())); } localComponent.ifPresent(lc -> validateRequiredProps(lc, foundProps)); }
private void addHiddenInputTag(Element form, String formIdAttrName, String formIdAttrValue) { Attributes attributes = Stream.of( new Attribute("type", "hidden"), new Attribute("name", formIdAttrName), new Attribute("value", formIdAttrValue)) .collect(Attributes::new, Attributes::put, Attributes::addAll); form.prependChild(new Element(Tag.valueOf("input"), "/", attributes)); }
private JsonObject getParams(Attribute paramsAttribute) { final JsonObject result; if (paramsAttribute == null || StringUtils.isEmpty(paramsAttribute.getValue())) { result = new JsonObject(); } else { result = new JsonObject(paramsAttribute.getValue()); } return result; }
/** * Factory method that creates context from the {@link Fragment}. All services and params are * extracted to separate entries. * * @param fragment - fragment from which the context will be created. * @return a FragmentContext that wraps given fragment. */ public static FragmentContext from(Fragment fragment) { Document document = Jsoup.parseBodyFragment(fragment.content()); Element scriptTag = document.body().child(0); List<Attribute> attributes = scriptTag.attributes().asList(); Map<String, Attribute> serviceAttributes = attributes.stream() .filter(attribute -> attribute.getKey().matches(DATA_SERVICE)) .collect(Collectors .toMap(attribute -> ServiceAttributeUtil.extractNamespace(attribute.getKey()), Function.identity())); Map<String, Attribute> paramsAttributes = attributes.stream() .filter(attribute -> attribute.getKey().matches(DATA_PARAMS)) .collect(Collectors .toMap(attribute -> ServiceAttributeUtil.extractNamespace(attribute.getKey()), Function.identity())); return new FragmentContext() .fragment(fragment) .services( serviceAttributes.entrySet().stream() .map(entry -> new ServiceEntry(entry.getValue(), paramsAttributes.get(entry.getKey()))) .collect(Collectors.toList()) ); }
@Test public void mergePayload_pathFromParamsAttribute() { ServiceEntry serviceEntry = new ServiceEntry( new Attribute("data-knotx-service-first", "first-service"), new Attribute("data-knotx-params-first", "{\"path\":\"first-service\"}")); serviceEntry.mergeParams(configWithDefaultParams.getServices().stream().findFirst().get().getParams()); Assert.assertEquals("first-service", serviceEntry.getParams().getString("path")); }
@Test public void mergePayload_pathFromConfigAttribute() { ServiceEntry serviceEntry = new ServiceEntry( new Attribute("data-knotx-service-first", "first-service"), new Attribute("data-knotx-params-first", "{}")); serviceEntry.mergeParams(configWithDefaultParams.getServices().stream().findFirst().get().getParams()); Assert.assertEquals("/service/mock/first.json", serviceEntry.getParams().getString("path")); }