@Test public void namespacePreservation() throws IOException { File in = ParseTest.getFile("/htmltests/namespaces.xhtml"); org.jsoup.nodes.Document jsoupDoc; jsoupDoc = Jsoup.parse(in, "UTF-8"); Document doc; org.jsoup.helper.W3CDom jDom = new org.jsoup.helper.W3CDom(); doc = jDom.fromJsoup(jsoupDoc); Node htmlEl = doc.getChildNodes().item(0); assertEquals("http://www.w3.org/1999/xhtml", htmlEl.getNamespaceURI()); assertEquals("html", htmlEl.getLocalName()); assertEquals("html", htmlEl.getNodeName()); Node epubTitle = htmlEl.getChildNodes().item(2).getChildNodes().item(3); assertEquals("http://www.idpf.org/2007/ops", epubTitle.getNamespaceURI()); assertEquals("title", epubTitle.getLocalName()); assertEquals("epub:title", epubTitle.getNodeName()); Node xSection = epubTitle.getNextSibling().getNextSibling(); assertEquals("urn:test", xSection.getNamespaceURI()); assertEquals("section", xSection.getLocalName()); assertEquals("x:section", xSection.getNodeName()); }
@Test public void testInvalidTableContents() throws IOException { File in = ParseTest.getFile("/htmltests/table-invalid-elements.html"); Document doc = Jsoup.parse(in, "UTF-8"); doc.outputSettings().prettyPrint(true); String rendered = doc.toString(); int endOfEmail = rendered.indexOf("Comment"); int guarantee = rendered.indexOf("Why am I here?"); assertTrue("Comment not found", endOfEmail > -1); assertTrue("Search text not found", guarantee > -1); assertTrue("Search text did not come after comment", guarantee > endOfEmail); }
@Test public void inputStream() { Connection.KeyVal kv = HttpConnection.KeyVal.create("file", "thumb.jpg", ParseTest.inputStreamFrom("Check")); assertEquals("file", kv.key()); assertEquals("thumb.jpg", kv.value()); assertTrue(kv.hasInputStream()); kv = HttpConnection.KeyVal.create("one", "two"); assertEquals("one", kv.key()); assertEquals("two", kv.value()); assertFalse(kv.hasInputStream()); }
@Test public void convertsGoogle() throws IOException { File in = ParseTest.getFile("/htmltests/google-ipod.html"); org.jsoup.nodes.Document doc = Jsoup.parse(in, "UTF8"); W3CDom w3c = new W3CDom(); Document wDoc = w3c.fromJsoup(doc); Node htmlEl = wDoc.getChildNodes().item(0); assertEquals(null, htmlEl.getNamespaceURI()); assertEquals("html", htmlEl.getLocalName()); assertEquals("html", htmlEl.getNodeName()); String out = w3c.asString(wDoc); assertTrue(out.contains("ipod")); }
@Test public void testLocation() throws IOException { File in = new ParseTest().getFile("/htmltests/yahoo-jp.html"); Document doc = Jsoup.parse(in, "UTF-8", "http://www.yahoo.co.jp/index.html"); String location = doc.location(); String baseUri = doc.baseUri(); assertEquals("http://www.yahoo.co.jp/index.html",location); assertEquals("http://www.yahoo.co.jp/_ylh=X3oDMTB0NWxnaGxsBF9TAzIwNzcyOTYyNjUEdGlkAzEyBHRtcGwDZ2Ex/",baseUri); in = new ParseTest().getFile("/htmltests/nyt-article-1.html"); doc = Jsoup.parse(in, null, "http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp"); location = doc.location(); baseUri = doc.baseUri(); assertEquals("http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp",location); assertEquals("http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp",baseUri); }
@Test public void testTemplateInsideTable() throws IOException { File in = ParseTest.getFile("/htmltests/table-polymer-template.html"); Document doc = Jsoup.parse(in, "UTF-8"); doc.outputSettings().prettyPrint(true); Elements templates = doc.body().getElementsByTag("template"); for (Element template : templates) { assertTrue(template.childNodes().size() > 1); } }
@Test public void convertsGoogleLocation() throws IOException { File in = ParseTest.getFile("/htmltests/google-ipod.html"); org.jsoup.nodes.Document doc = Jsoup.parse(in, "UTF8"); W3CDom w3c = new W3CDom(); Document wDoc = w3c.fromJsoup(doc); String out = w3c.asString(wDoc); assertEquals(doc.location(), wDoc.getDocumentURI() ); }