@Override public _Object deserialize(Reader reader, Config config) { StringBuilder sb = new StringBuilder(); char[] buff = new char[100]; int len; try { while ((len = reader.read(buff)) > 0) sb.append(buff, 0, len); } catch (Exception e) { throw new RuntimeException(e); } Document doc = Jsoup.parse(sb.toString()); JSoupHtmlNodeVisitor visitor = new JSoupHtmlNodeVisitor(); NodeTraversor traversor = new NodeTraversor(visitor); traversor.traverse(doc); return visitor.getObject(); }
/** * Format an Element to plain-text * @param element the root element to format * @return formatted text */ private static String getPlainText(Element element) { FormattingVisitor formatter = new FormattingVisitor(); NodeTraversor.traverse(formatter, element); // walk the DOM, and call .head() and .tail() for each node return formatter.toString(); }
/** * removes HTML tags from a google calendar event's description * @param description an event description possibly containing HTML tags * @return an event description free of HTML tags */ public static String cleanDescription(String description) { FormattingVisitor formatter = new FormattingVisitor(); new NodeTraversor(formatter).traverse(Jsoup.parse(description)); return formatter.toString(); }
/** * Format an Element to plain-text * @param element the root element to format * @return formatted text */ public String getPlainText(Element element) { FormattingVisitor formatter = new FormattingVisitor(); NodeTraversor traversor = new NodeTraversor(formatter); traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node return formatter.toString(); }
/** * Converts a jsoup document into the provided W3C Document. If required, you can set options on the output document * before converting. * @param in jsoup doc * @param out w3c doc * @see org.jsoup.helper.W3CDom#fromJsoup(org.jsoup.nodes.Document) */ public void convert(org.jsoup.nodes.Document in, Document out) { if (!StringUtil.isBlank(in.location())) out.setDocumentURI(in.location()); org.jsoup.nodes.Element rootEl = in.child(0); // skip the #root node NodeTraversor traversor = new NodeTraversor(new W3CBuilder(out)); traversor.traverse(rootEl); }
/** * Perform a depth-first traversal through this node and its descendants. * @param nodeVisitor the visitor callbacks to perform on each node * @return this node, for chaining */ public Node traverse(NodeVisitor nodeVisitor) { Validate.notNull(nodeVisitor); NodeTraversor traversor = new NodeTraversor(nodeVisitor); traversor.traverse(this); return this; }
@Override public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] html, URL baseUrl, URLCollection coll, String encoding) throws HTMLParseException { try { // TODO Handle conditional comments for IE String contents = new String(html,encoding); Document doc = Jsoup.parse(contents); JMeterNodeVisitor nodeVisitor = new JMeterNodeVisitor(new URLPointer(baseUrl), coll); new NodeTraversor(nodeVisitor).traverse(doc); return coll.iterator(); } catch (Exception e) { throw new HTMLParseException(e); } }
/** * Format an Element to plain-text * * @param element the root element to format * @return formatted text */ public String getPlainText(Element element) { FormattingVisitor formatter = new FormattingVisitor(); NodeTraversor traversor = new NodeTraversor(formatter); traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node return formatter.toString(); }
/** * Format an Element to LaTeX * * @param element * the root element to format * @return formatted text */ public static String getLatexText(Element element) { FormattingVisitor formatter = new FormattingVisitor(); NodeTraversor traversor = new NodeTraversor(formatter); traversor.traverse(element); // walk the DOM, and call .head() and // .tail() for each node return formatter.toString(); }
/** * Converts a jsoup document into the provided W3C Document. If required, you can set options on the output document * before converting. * @param in jsoup doc * @param out w3c doc * @see org.jsoup.helper.W3CDom#fromJsoup(org.jsoup.nodes.Document) */ public void convert(org.jsoup.nodes.Document in, Document out) { if (!StringUtil.isBlank(in.location())) out.setDocumentURI(in.location()); org.jsoup.nodes.Element rootEl = in.child(0); // skip the #root node NodeTraversor.traverse(new W3CBuilder(out), rootEl); }
/** * Format an Element to plain-text * @param element the root element to format * @return formatted text */ public String getPlainText(Element element) { FormattingVisitor formatter = new FormattingVisitor(); NodeTraversor.traverse(formatter, element); // walk the DOM, and call .head() and .tail() for each node return formatter.toString(); }
@Override public Iterator<URL> getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection coll, String encoding) throws HTMLParseException { try { String contents = new String(html,encoding); Document doc = Jsoup.parse(contents); JMeterNodeVisitor nodeVisitor = new JMeterNodeVisitor(new URLPointer(baseUrl), coll); new NodeTraversor(nodeVisitor).traverse(doc); return coll.iterator(); } catch (Exception e) { throw new HTMLParseException(e); } }
protected static String convertNodeToText(Node node) { if (node == null) return ""; StringBuilder buffer = new StringBuilder(); new NodeTraversor(new ToTextNodeVisitor(buffer)).traverse(node); return buffer.toString().trim(); }
protected static String convertElementsToText(Elements elements) { if (elements == null || elements.isEmpty()) return ""; StringBuilder buffer = new StringBuilder(); NodeTraversor nt = new NodeTraversor(new ToTextNodeVisitor(buffer)); for (Element element : elements) { nt.traverse(element); } return buffer.toString().trim(); }
/** * Convert an HTML string to a plain-text string. * * @param htmlStr a string containing HTML markup * @return formatted text */ public static String toPlainText(String htmlStr) { Document doc = Jsoup.parse(htmlStr); PlainTextFormattingVisitor formatter = new PlainTextFormattingVisitor(); NodeTraversor traversor = new NodeTraversor(formatter); traversor.traverse(doc); // walk the DOM, and call .head() and .tail() for each node return formatter.toString(); }
private void copySafeNodes(Element source, Element destination) { CleaningVisitor cleaningVisitor = new CleaningVisitor(source, destination); NodeTraversor traversor = new NodeTraversor(cleaningVisitor); traversor.traverse(source); }
private int copySafeNodes(Element source, Element dest) { CleaningVisitor cleaningVisitor = new CleaningVisitor(source, dest); NodeTraversor traversor = new NodeTraversor(cleaningVisitor); traversor.traverse(source); return cleaningVisitor.numDiscarded; }
protected void outerHtml(Appendable accum) { new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this); }
protected void outerHtml(StringBuilder accum) { new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this); }