@Test public void replacePlaceholderWithNode() { Map<String, Node> nodeIdMap = new HashMap<>(); String html = "<div>"; for (int i = 0; i < 5; i++) { Attributes attrs = new Attributes(); String id = "id" + i; attrs.put("id", id); Element ele = new Element(Tag.valueOf("span"), "", attrs); ele.append("The original node"); nodeIdMap.put(id, ele); Element placeholder = ArticleUtil.generatePlaceholderNode(id); html += placeholder.outerHtml(); } html += "</div>"; String results = ArticleUtil.replacePlaceholderWithNode(nodeIdMap, html); for (Node originalNode: nodeIdMap.values()) { assertThat(results).contains(originalNode.outerHtml()); } }
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
@Test public void testMain() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); SemanticHtml sa = new SemanticHtml(); Map<String, Class<?>> expectedMain = new HashMap<>(); expectedMain.put("time", Temporal.class); expectedMain.put("meter", Quantity.class); expectedMain.put("dfn", Buzzword.class); expectedMain.put("address", Location.class); expectedMain.put("abbr", Buzzword.class); expectedMain.put("cite", DocumentReference.class); for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) { Element element = new Element(Tag.valueOf(e.getKey()), ""); AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, element, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
@Test public void testHeadings() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Element h1 = new Element(Tag.valueOf("h1"), ""); final Element h2 = new Element(Tag.valueOf("h2"), ""); final Element h3 = new Element(Tag.valueOf("h3"), ""); final Element h4 = new Element(Tag.valueOf("h4"), ""); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, h1, collector); sa.map(jCas, h2, collector); sa.map(jCas, h3, collector); sa.map(jCas, h4, collector); Heading heading1 = (Heading) collector.getAnnotations().get(0); Heading heading2 = (Heading) collector.getAnnotations().get(1); Heading heading3 = (Heading) collector.getAnnotations().get(2); Heading heading4 = (Heading) collector.getAnnotations().get(3); assertEquals(1, heading1.getLevel()); assertEquals(2, heading2.getLevel()); assertEquals(3, heading3.getLevel()); assertEquals(4, heading4.getLevel()); }
@Test public void testLink() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Element a1 = new Element(Tag.valueOf("a"), ""); a1.attr("href", "http://example.com"); final Element a2 = new Element(Tag.valueOf("a"), ""); a2.attr("href", "/example.com"); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, a1, collector); sa.map(jCas, a2, collector); Annotation link = collector.getAnnotations().get(0); assertTrue(link instanceof Link); assertEquals("http://example.com", ((Link) link).getTarget()); Annotation link2 = collector.getAnnotations().get(1); assertTrue(link2 instanceof Link); assertEquals("/example.com", ((Link) link2).getTarget()); }
@Test public void testMain() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Map<String, Class<?>> expectedMain = new HashMap<>(); expectedMain.put("Document", Document.class); expectedMain.put("SlideShow", SlideShow.class); expectedMain.put("SpreadSheet", SpreadSheet.class); expectedMain.put("Another", Document.class); for (final Map.Entry<String, Class<?>> e : expectedMain.entrySet()) { final Element anchor = new Element(Tag.valueOf("main"), ""); anchor.attr("class", e.getKey()); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, anchor, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
@Test public void testArticle() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Map<String, Class<?>> expectedArticle = new HashMap<>(); expectedArticle.put("Sheet", Sheet.class); expectedArticle.put("Slide", Slide.class); expectedArticle.put("Page", Page.class); expectedArticle.put("Another", Page.class); for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) { final Element anchor = new Element(Tag.valueOf("article"), ""); anchor.attr("class", e.getKey()); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, anchor, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
@Test public void testNameContent() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); MetaTags mt = new MetaTags(); Element element = new Element(Tag.valueOf("meta"), ""); element.attr("name", "key"); element.attr("content", "value"); AnnotationCollector collector = new AnnotationCollector(); mt.map(jCas, element, collector); Metadata annotation = (Metadata) collector.getAnnotations().get(0); assertEquals("key", annotation.getKey()); assertEquals("value", annotation.getValue()); }
public void head(Node source, int depth) { if (skipChildren) { return; } if (source instanceof Element) { Element sourceElement = (Element) source; if (isSafeTag(sourceElement)) { String sourceTag = sourceElement.tagName(); Attributes destinationAttributes = sourceElement.attributes().clone(); Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes); destination.appendChild(destinationChild); destination = destinationChild; } else if (source != root) { skipChildren = true; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri()); destination.appendChild(destinationText); } else if (source instanceof DataNode && isSafeTag(source.parent())) { DataNode sourceData = (DataNode) source; DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri()); destination.appendChild(destinationData); } }
private void addHiddenInputTag(Element form, String formIdAttrName, String formIdAttrValue) { Attributes attributes = Stream.of( new Attribute("type", "hidden"), new Attribute("name", formIdAttrName), new Attribute("value", formIdAttrValue)) .collect(Attributes::new, Attributes::put, Attributes::addAll); form.prependChild(new Element(Tag.valueOf("input"), "/", attributes)); }
/** * Returns an {@link org.jsoup.nodes.Element} consisting of an {@code <a>} tag wrapped around * the {@code <img>} tag, both of which share the {@link ImageAction} link representation provided. */ public static Element getImageActionElement(String imageActionStringRepresentation) { return new Element(Tag.valueOf("a"), "") .attr("href", imageActionStringRepresentation) .appendChild( new Element(Tag.valueOf("img"), "") .attr("src", imageActionStringRepresentation)); }
private String getResourceWithExternalGif() { String source = loader.readTestResourceFile("SampleImageCommentAJAXCallResponse.html"); Document doc = Jsoup.parse(source); Element postBody = doc.select(".communication__body__text").first(); postBody.select("div.image-show-container").first().remove(); postBody.appendChild(new Element(Tag.valueOf("img"), "").attr("src", expectedImageSource + ".gif")); return doc.outerHtml(); }
/** * 按原Element重建一个新的Element * @param sourceEl * @return */ private static Element createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { destAttrs.put(sourceAttr); } return dest; }
/** * return the Tag of the first heading (h1, h2..) ancestor otherwise return * null if no ancestor is heading */ public static Tag findHeadingAncestor(Node node) { Node ancestor = node; while (ancestor != null) { Tag t = getHeadingTag(ancestor); if (t != null) { return t; } ancestor = ancestor.parent(); } return null; }
/** * return the Tag of element if it is a heading h1, h2.. otherwise return * null */ public static Tag getHeadingTag(Node elem) { if (elem == null || !(elem instanceof Element)) { return null; } Tag t = ((Element) elem).tag(); return TagsType.get(t.getName()) == TagType.BLOCKLEVEL_TITLE ? t : null; }
/** * 将所有 {@code <p>} 标签转换为 {@code <div>} 标签。 */ private static void convertNodePToDiv(Element element) { Elements elements = element.select("p"); for (Element e : elements) { Element div = new Element(Tag.valueOf("div"), ""); div.html(e.html()); // 尝试将 <p> 的样式复制到 <div> if (!StringUtils.isBlank(e.attr("style"))) { div.attr("style", e.attr("style")); } e.replaceWith(div); logger.trace("Replaced {} with {}", e.nodeName(), div.nodeName()); } }
/** * Generate a non-translatable element */ public static Element generatePlaceholderNode(@NotNull String id) { Attributes attrs = new Attributes(); attrs.put("id", id); attrs.put("translate", "no"); return new Element(Tag.valueOf("var"), "", attrs); }
@Test public void testConstructor() { Map<String, Node> map = new HashMap<>(); Element doc = new Element(Tag.valueOf("span"), "", new Attributes()); TranslatableHTMLNode node = new TranslatableHTMLNode( Lists.newArrayList(doc), map); assertThat(node.getPlaceholderIdMap()).isEqualTo(map); assertThat(node.getHtml()).isEqualTo(doc.outerHtml()); }
@NotNull SubstitutionSchedule parseSVPlanSchedule(List<Document> docs) throws IOException, JSONException { SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData); for (Document doc : docs) { if (doc.select(".svp").size() > 0) { for (Element svp : doc.select(".svp")) { parseSvPlanDay(v, svp, doc); } } else if (doc.select(".Trennlinie").size() > 0) { Element div = new Element(Tag.valueOf("div"), ""); for (Node node : doc.body().childNodesCopy()) { if (node instanceof Element && ((Element) node).hasClass("Trennlinie") && div.select("table").size() > 0) { parseSvPlanDay(v, div, doc); div = new Element(Tag.valueOf("div"), ""); } else { div.appendChild(node); } } parseSvPlanDay(v, div, doc); } else { parseSvPlanDay(v, doc, doc); } } v.setClasses(getAllClasses()); v.setTeachers(getAllTeachers()); return v; }
/** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = tagName.toLowerCase().trim(); return Collector.collect(new Evaluator.Tag(tagName), this); }
@Test public void root() { Elements sel = html.select(":root"); assertEquals(1, sel.size()); assertNotNull(sel.get(0)); assertEquals(Tag.valueOf("html"), sel.get(0).tag()); Elements sel2 = html.select("body").select(":root"); assertEquals(1, sel2.size()); assertNotNull(sel2.get(0)); assertEquals(Tag.valueOf("body"), sel2.get(0).tag()); }
@Test public void insertChildrenAtPosition() { Document doc = Jsoup.parse("<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>"); Element div1 = doc.select("div").get(0); Elements p1s = div1.select("p"); Element div2 = doc.select("div").get(1); assertEquals(2, div2.childNodeSize()); div2.insertChildren(-1, p1s); assertEquals(2, div1.childNodeSize()); // moved two out assertEquals(4, div2.childNodeSize()); assertEquals(3, p1s.get(1).siblingIndex()); // should be last List<Node> els = new ArrayList<Node>(); Element el1 = new Element(Tag.valueOf("span"), "").text("Span1"); Element el2 = new Element(Tag.valueOf("span"), "").text("Span2"); TextNode tn1 = new TextNode("Text4", ""); els.add(el1); els.add(el2); els.add(tn1); assertNull(el1.parent()); div2.insertChildren(-2, els); assertEquals(div2, el1.parent()); assertEquals(7, div2.childNodeSize()); assertEquals(3, el1.siblingIndex()); assertEquals(4, el2.siblingIndex()); assertEquals(5, tn1.siblingIndex()); }
@Test public void testHashcodeIsStableWithContentChanges() { Element root = new Element(Tag.valueOf("root"), ""); HashSet<Element> set = new HashSet<Element>(); // Add root node: set.add(root); root.appendChild(new Element(Tag.valueOf("a"), "")); assertTrue(set.contains(root)); }
@Test public void before() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().before(newNode); assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html()); doc.select("b").first().before("<i>five</i>"); assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html()); }
@Test public void after() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().after(newNode); assertEquals("<p>One <b>two</b><em>four</em> three</p>", doc.body().html()); doc.select("b").first().after("<i>five</i>"); assertEquals("<p>One <b>two</b><i>five</i><em>four</em> three</p>", doc.body().html()); }
@Test public void orphanNodeReturnsNullForSiblingElements() { Node node = new Element(Tag.valueOf("p"), ""); Element el = new Element(Tag.valueOf("p"), ""); assertEquals(0, node.siblingIndex()); assertEquals(0, node.siblingNodes().size()); assertNull(node.previousSibling()); assertNull(node.nextSibling()); assertEquals(0, el.siblingElements().size()); assertNull(el.previousElementSibling()); assertNull(el.nextElementSibling()); }