@Override public String getAttribute(final String name) { if ("text".equals(name)) { return getText(); } if ("hRefIndex".equals(name)) { return getHRefIndex() + ""; } if ("textIndex".equals(name)) { return getTextIndex() + ""; } return EventQueueWait.exec(new Callable<String>() { @Override public String call() throws Exception { Iterator iterator = findTag((HTMLDocument) ((JEditorPane) parent.getComponent()).getDocument()); AttributeSet attributes = iterator.getAttributes(); Attribute attr = findAttribute(name); if (attr != null && attributes.isDefined(attr)) { return attributes.getAttribute(attr).toString(); } return null; } }); }
/** * Get the URL location of the image to render. If this method returns null, * the "no image" icon is rendered instead. By defaul, url must be present as * the "src" property of the IMG tag. If it is missing, null is returned and * the "no image" icon is rendered. * * @return the URL location of the image to render. */ public URL getImageURL() { Element el = getElement(); String src = (String) el.getAttributes().getAttribute(Attribute.SRC); URL url = null; if (src != null) { URL base = ((HTMLDocument) getDocument()).getBase(); try { url = new URL(base, src); } catch (MalformedURLException ex) { // Return null. } } return url; }
/** * Update all cached properties from the attribute set, returned by the * {@link #getAttributes}. */ protected void setPropertiesFromAttributes() { AttributeSet atts = getAttributes(); StyleSheet ss = getStyleSheet(); float emBase = ss.getEMBase(atts); float exBase = ss.getEXBase(atts); spans[X_AXIS] = (Length) atts.getAttribute(CSS.Attribute.WIDTH); if (spans[X_AXIS] != null) { spans[X_AXIS].setFontBases(emBase, exBase); } spans[Y_AXIS] = (Length) atts.getAttribute(CSS.Attribute.HEIGHT); if (spans[Y_AXIS] != null) { spans[Y_AXIS].setFontBases(emBase, exBase); } }
/** * Generate DCAT distribution. * * @param store RDF store * @param dataset URI * @param access access URL of the dataset * @param link link element * @param i row sequence * @param lang language code * @throws MalformedURLException * @throws RepositoryException */ private void generateDist(Storage store, IRI dataset, URL access, Elements link, int i, String lang) throws MalformedURLException, RepositoryException { String href = link.first().attr(Attribute.HREF.toString()); URL download = makeAbsURL(href); URL u = makeDistURL(i + "/" + lang); IRI dist = store.getURI(u.toString()); logger.debug("Generating distribution {}", dist.toString()); store.add(dataset, DCAT.HAS_DISTRIBUTION, dist); store.add(dist, RDF.TYPE, DCAT.DISTRIBUTION); store.add(dist, DCTERMS.LANGUAGE, MDR_LANG.MAP.get(lang)); store.add(dist, DCTERMS.TITLE, link.first().ownText(), lang); store.add(dist, DCAT.ACCESS_URL, access); store.add(dist, DCAT.DOWNLOAD_URL, download); store.add(dist, DCAT.MEDIA_TYPE, getFileExt(href)); }
/** * Get the list of all the downloads (DCAT Dataset). * * @return List of URLs * @throws IOException */ @Override protected List<URL> scrapeDatasetList() throws IOException { List<URL> urls = new ArrayList<>(); URL base = getBase(); String front = makeRequest(base); // Get all the main themes Elements themes = Jsoup.parse(front).select(LINK_THEME); if (themes != null) { for (Element theme: themes) { String href = theme.attr(Attribute.HREF.toString()); urls.addAll(scrapeSubList(href)); sleep(); } } else { logger.error("No themes {} found", LINK_THEME); } return urls; }
/** * Generate DCAT Distribution. * * @param store RDF store * @param dataset dataset URI * @param access access URL * @param link link element * @param lang language code * @throws MalformedUrlException * @throws RepositoryException */ private void generateDist(Storage store, IRI dataset, URL access, Element link, String lang) throws MalformedURLException, RepositoryException { String href = link.attr(Attribute.HREF.toString()); URL download = makeAbsURL(href); // important for EDP: does not like different datasets pointing to same distribution String id = makeHashId(dataset.toString()) + "/" + makeHashId(download.toString()); IRI dist = store.getURI(makeDistURL(id).toString() + "/" + lang); logger.debug("Generating distribution {}", dist.toString()); store.add(dataset, DCAT.HAS_DISTRIBUTION, dist); store.add(dist, RDF.TYPE, DCAT.DISTRIBUTION); store.add(dist, DCTERMS.LANGUAGE, MDR_LANG.MAP.get(lang)); store.add(dist, DCTERMS.TITLE, link.ownText(), lang); store.add(dist, DCAT.ACCESS_URL, access); store.add(dist, DCAT.DOWNLOAD_URL, download); store.add(dist, DCAT.MEDIA_TYPE, getFileExt(href)); }
/** * Get the list of all the downloads (DCAT Dataset). * * @return List of URLs * @throws IOException */ @Override protected List<URL> scrapeDatasetList() throws IOException { List<URL> urls = new ArrayList<>(); URL base = getBase(); // Go through all the pages for(int i = 1; ; i++) { logger.info("Scraping page {}", i); String page = makeRequest(new URL(base + "?page=" + i)); Elements links = Jsoup.parse(page).select(VIEW_HREF); if (links == null || links.isEmpty()) { break; } for (Element link: links) { String href = link.attr(Attribute.HREF.toString()); urls.add(makeAbsURL(href)); } sleep(); } return urls; }
/** * Generate DCAT Distribution. * * @param store RDF store * @param dataset dataset URI * @param access access URL * @param link link element * @param lang language code * @throws MalformedUrlException * @throws RepositoryException */ private void generateDist(Storage store, IRI dataset, URL access, Element link, String lang) throws MalformedURLException, RepositoryException { String href = link.attr(Attribute.HREF.toString()); URL download = makeAbsURL(href); String id = makeHashId(dataset.toString()) + "/" + makeHashId(download.toString()); IRI dist = store.getURI(makeDistURL(id).toString() + "/" + lang); logger.debug("Generating distribution {}", dist.toString()); store.add(dataset, DCAT.HAS_DISTRIBUTION, dist); store.add(dist, RDF.TYPE, DCAT.DISTRIBUTION); store.add(dist, DCTERMS.LANGUAGE, MDR_LANG.MAP.get(lang)); store.add(dist, DCTERMS.TITLE, link.ownText(), lang); store.add(dist, DCAT.ACCESS_URL, access); store.add(dist, DCAT.DOWNLOAD_URL, download); store.add(dist, DCAT.MEDIA_TYPE, link.ownText()); }
/** * Generate DCAT distribution. * * @param store RDF store * @param dataset URI * @param name short name * @param access URL of the acess page * @param link download link element * @param lang language code * @throws MalformedURLException * @throws RepositoryException */ private void generateDist(Storage store, IRI dataset, String name, String access, Element link, String lang) throws MalformedURLException, RepositoryException { String href = link.attr(Attribute.HREF.toString()); String fmt = link.ownText().replaceAll("/", "") .replaceAll(" ", "") .replaceAll(" ", ""); URL u = makeDistURL(name + "/" + fmt); IRI dist = store.getURI(u.toString()); logger.debug("Generating distribution {}", dist.toString()); store.add(dataset, DCAT.HAS_DISTRIBUTION, dist); store.add(dist, RDF.TYPE, DCAT.DISTRIBUTION); store.add(dist, DCTERMS.LANGUAGE, MDR_LANG.MAP.get(lang)); store.add(dist, DCTERMS.TITLE, fmt, lang); store.add(dist, DCAT.ACCESS_URL, makeAbsURL(access)); store.add(dist, DCAT.DOWNLOAD_URL, makeAbsURL(href)); store.add(dist, DCAT.MEDIA_TYPE, fmt); }
/** * Generate DCAT distribution. * * @param store RDF store * @param dataset URI * @param access access URL of the dataset * @param row row element * @param link link element * @param lang language code * @throws MalformedURLException * @throws RepositoryException */ private void generateDist(Storage store, IRI dataset, URL access, String text, Element link, String lang) throws MalformedURLException, RepositoryException { String href = link.attr(Attribute.HREF.toString()); URL download = makeAbsURL(href); String id = makeHashId(dataset.toString()) + "/" + makeHashId(download.toString()); IRI dist = store.getURI(makeDistURL(id).toString()); logger.debug("Generating distribution {}", dist.toString()); store.add(dataset, DCAT.HAS_DISTRIBUTION, dist); store.add(dist, RDF.TYPE, DCAT.DISTRIBUTION); store.add(dist, DCTERMS.LANGUAGE, MDR_LANG.MAP.get(lang)); store.add(dist, DCTERMS.TITLE, link.ownText(), lang); store.add(dist, DCTERMS.DESCRIPTION, text, lang); store.add(dist, DCAT.ACCESS_URL, access); store.add(dist, DCAT.DOWNLOAD_URL, download); store.add(dist, DCAT.MEDIA_TYPE, getFileExt(href)); }
/** * Generate DCAT distribution. * * @param store RDF store * @param dataset URI * @param access access URL of the dataset * @param link link element * @param code dataset code * @param lang language code * @throws MalformedURLException * @throws RepositoryException */ private void generateDist(Storage store, IRI dataset, URL access, Elements link, String code, String lang) throws MalformedURLException, RepositoryException { String href = link.first().attr(Attribute.HREF.toString()); URL download = makeAbsURL(href); // file type e.g. in "Link (pdf)" String txt = link.first().text(); String ftype = txt.replaceAll("(\\w+\\s*\\()(\\w+)\\)", "$2"); URL u = makeDistURL(code + "/" + lang); IRI dist = store.getURI(u.toString()); logger.debug("Generating distribution {}", dist.toString()); store.add(dataset, DCAT.HAS_DISTRIBUTION, dist); store.add(dist, RDF.TYPE, DCAT.DISTRIBUTION); store.add(dist, DCTERMS.LANGUAGE, MDR_LANG.MAP.get(lang)); store.add(dist, DCTERMS.TITLE, link.first().ownText(), lang); store.add(dist, DCAT.ACCESS_URL, access); store.add(dist, DCAT.DOWNLOAD_URL, download); store.add(dist, DCAT.MEDIA_TYPE, ftype.toLowerCase()); }
/** * Inform view creation. * @param view the newly created view. */ protected void viewCreated(ViewFactory factory, View view) { if(view instanceof ImageView) { Element e = findFirstElement(view.getElement(), "img"); if(e == null) return; Object src = e.getAttributes().getAttribute(Attribute.SRC); if(src != null && src instanceof String && ((String)src).endsWith("gif")) { imageViews.add((ImageView)view); } } }
private MutableAttributeSet filterAttributeSet(Tag t, MutableAttributeSet a) { for(Object aName: Collections.list((Enumeration<Object>)a.getAttributeNames())) { List<Attribute> lAttr = acceptedAttributes.get(t); if(lAttr == null || !lAttr.contains(aName)) { a.removeAttribute(aName); } else { if(aName == HTML.Attribute.STYLE) { //System.out.println(">> " + aName + ": " + a.getAttribute(aName)); if(t.isBlock()) { a.addAttribute(aName, DocumentUtil.ensureAcceptedCssProperties( (String) a.getAttribute(aName), acceptedBlockCssProperties)); } else { a.addAttribute(aName, DocumentUtil.ensureAcceptedCssProperties( (String) a.getAttribute(aName), acceptedInlineCssProperties)); } } } } return a; }
public static void corrigePImplied(ExtendedHTMLDocument doc) { List<Element> tds = findElementByTag(doc, Tag.TD); for(Element td: tds) { if(td.getElementCount() > 0) { Element p = td.getElement(0); AttributeSet attrs = p.getAttributes(); if(attrs.containsAttribute(StyleConstants.NameAttribute, Tag.IMPLIED) && attrs.isDefined(CSS.Attribute.TEXT_ALIGN)) { SimpleAttributeSet s = new SimpleAttributeSet(); s.addAttribute(StyleConstants.NameAttribute, Tag.P); doc.setParagraphAttributes(p.getStartOffset(), p.getEndOffset(), s, false); } } } }
public void handleStartTag(Tag t, MutableAttributeSet a, int pos) { if(t == Tag.TABLE) { tableDepth++; } if(isAccepted(t)) { a = filterAttributeSet(t, a); if(t == Tag.TABLE) { a.addAttribute(Attribute.WIDTH, "100%"); } else if(t == Tag.TD) { a.addAttribute(Attribute.VALIGN, "top"); } reader.handleStartTag(t, a, pos); } }
public static boolean hasClass(AttributeSet attr, String className) { String classValue = (String) attr.getAttribute(Attribute.CLASS); if(classValue == null) { return false; } String[] classNames = classValue.split(" "); for(String c: classNames) { if(c.equals(className)) { return true; } } return false; }
/** * * @param element * @param attrName * @param matchStrings * @return */ private Element findFirstElement( Element element, HTML.Attribute attrName, String[] matchStrings) { String attr = (String) element.getAttributes().getAttribute(attrName); if(attr != null) for (String matchString : matchStrings) if (attr.startsWith(matchString)) return element; Element resultElement = null; // Count how many messages we have in the document. for (int i = 0; i < element.getElementCount(); i++) { resultElement = findFirstElement(element.getElement(i), attrName, matchStrings); if (resultElement != null) return resultElement; } return null; }
private Attribute findAttribute(String attrName) { for (Attribute attr : allAttributes) { if (attrName.toUpperCase().equals(attr.toString().toUpperCase())) { return attr; } } return null; }
public int getHRefIndex() { return EventQueueWait.exec(new Callable<Integer>() { @Override public Integer call() throws Exception { String href = getAttribute("href"); int hRefIndex = 0; int current = 0; JEditorPane editor = (JEditorPane) parent.getComponent(); HTMLDocument document = (HTMLDocument) editor.getDocument(); Iterator iterator = document.getIterator(Tag.A); while (iterator.isValid()) { if (current++ >= index) { return hRefIndex; } AttributeSet attributes = iterator.getAttributes(); if (attributes != null) { Object attributeObject = attributes.getAttribute(HTML.Attribute.HREF); if (attributeObject != null) { String attribute = attributeObject.toString(); if (attribute.equals(href)) { hRefIndex++; } } } iterator.next(); } return -1; } }); }
/** * Get the image alignment. This method works handling standart alignment * attributes in the HTML IMG tag (align = top bottom middle left right). * Depending from the parameter, either horizontal or vertical alingment * information is returned. * * @param axis - * either X_AXIS or Y_AXIS */ public float getAlignment(int axis) { AttributeSet attrs = getAttributes(); Object al = attrs.getAttribute(Attribute.ALIGN); // Default is top left aligned. if (al == null) return 0.0f; String align = al.toString(); if (axis == View.X_AXIS) { if (align.equals("middle")) return 0.5f; else if (align.equals("left")) return 0.0f; else if (align.equals("right")) return 1.0f; else return 0.0f; } else if (axis == View.Y_AXIS) { if (align.equals("middle")) return 0.5f; else if (align.equals("top")) return 0.0f; else if (align.equals("bottom")) return 1.0f; else return 0.0f; } else throw new IllegalArgumentException("axis " + axis); }
/** * Get the text that should be shown as the image replacement and also as the * image tool tip text. The method returns the value of the attribute, having * the name {@link Attribute#ALT}. If there is no such attribute, the image * name from the url is returned. If the URL is not available, the empty * string is returned. */ public String getAltText() { Object rt = getAttributes().getAttribute(Attribute.ALT); if (rt != null) return rt.toString(); else { URL u = getImageURL(); if (u == null) return ""; else return u.getFile(); } }
/** * Get the attribute value, matching this key. If not found in this set, the * call is delegated to parent. * * @return the value, matching key (or null if none). */ public Object getAttribute(Object key) { // Null and HTML attributes or tags can be searched by direct comparison. if (key == null || key instanceof Attribute || key instanceof Tag) { for (int i = 0; i < keys.length; i++) { if (keys[i] == key) return values[i]; } } // Strings are case insensitive. Only string can be match the string. else if (key instanceof String) { String ks = (String) key; for (int i = 0; i < keys.length; i++) { if (keys[i] instanceof String) if (ks.equalsIgnoreCase((String) keys[i])) return values[i]; } } // Otherwise, defaults to .equals else { for (int i = 0; i < keys.length; i++) { if (key.equals(keys[i])) return values[i]; } } if (parent != null) return parent.getAttribute(key); else return null; }
@SuppressWarnings("unused") public void initialize(Element elem) { synchronized (this) { bLoading = true; } int width = 0; int height = 0; boolean customWidth = false; boolean customHeight = false; try { fElement = elem; // request image from document's cache AttributeSet attr = elem.getAttributes(); // get height & width from params or image or defaults height = getIntAttr(HTML.Attribute.HEIGHT, -1); customHeight = (height > 0); width = getIntAttr(HTML.Attribute.WIDTH, -1); customWidth = (width > 0); } finally { synchronized (this) { bLoading = false; // NullnessAnaysis fails to show that $this is // non-null } } }
/** * Switch to another language * * @param lang * @return * @throws IOException */ private URL switchLanguage(String lang) throws IOException { URL base = getBase(); String front = makeRequest(base); Elements lis = Jsoup.parse(front).getElementsByClass(HtmlFodMobilit.LANG_LINK); for (Element li : lis) { if (li.text().equals(lang)) { String href = li.attr(Attribute.HREF.toString()); return new URL(base, href); } } return base; }
/** * Get the list of all the categories. * * @return list of category URLs * @throws IOException */ @Override protected List<URL> scrapeDatasetList() throws IOException { List<URL> urls = new ArrayList<>(); URL base = getBase(); String front = makeRequest(base); Elements links = Jsoup.parse(front).select(LINKS_DATASETS); for (Element link : links) { String href = link.attr(HTML.Attribute.HREF.toString()); urls.add(makeAbsURL(href)); } return urls; }
/** * Generate DCAT Dataset * * @param store RDF store * @param id dataset id * @param page * @throws MalformedURLException * @throws RepositoryException */ @Override protected void generateDataset(Storage store, String id, Map<String, Page> page) throws MalformedURLException, RepositoryException { String lang = getDefaultLang(); Page p = page.get(""); String html = p.getContent(); Elements elements = Jsoup.parse(html).select(LINK_DATASET); for (Element element : elements) { String anchor = element.select(NAME_DATASET).attr(Attribute.NAME.toString()); generateDataset(store, id, element, anchor, lang); } }
/** * Switch to another language * * @param page * @param lang language code * @return * @throws IOException */ private URL switchLanguage(String page, String lang) throws IOException { Elements lis = Jsoup.parse(page).getElementsByClass(LANG_LINK); for (Element li : lis) { if (li.text().equals(lang)) { String href = li.attr(Attribute.HREF.toString()); if (href != null && !href.isEmpty()) { return makeAbsURL(href); } } } logger.warn("No {} translation for page {}", lang, page); return null; }
/** * Get the list of all the statistics. * * @return list of category URLs * @throws IOException */ @Override protected List<URL> scrapeDatasetList() throws IOException { List<URL> urls = new ArrayList<>(); URL base = getBase(); String front = makeRequest(base); Elements links = Jsoup.parse(front).select(LIST_DATASETS); for (Element link : links) { String href = link.attr(Attribute.HREF.toString()); urls.add(makeAbsURL(href)); } return urls; }
/** * Switch to another language * * @param lang * @return * @throws IOException */ private URL switchLanguage(String lang) throws IOException { URL base = getBase(); String front = makeRequest(base); Elements lis = Jsoup.parse(front).getElementsByClass(HtmlFodDiplomatie.LANG_LINK); for (Element li : lis) { if (li.text().equals(lang)) { String href = li.attr(Attribute.HREF.toString()); return makeAbsURL(href); } } return base; }
/** * Get the URL of the page in another language * * @param page * @param lang * @return URL of the page in another language * @throws IOException */ private URL switchLanguage(String page, String lang) throws IOException { Elements hrefs = Jsoup.parse(page).select(LANG_LINK); for (Element href : hrefs) { if (href.text().trim().toLowerCase().equals(lang)) { String link = href.attr(HTML.Attribute.HREF.toString()); if (link != null && !link.isEmpty()) { return makeAbsURL(link); } } } logger.warn("No {} translation for page", lang); return null; }
public void testGetIntegerAttributeValue() { MutableAttributeSet attrs = new SimpleAttributeSet(); attrs.addAttribute(Attribute.COLSPAN, new String("11")); assertEquals(11, HTML.getIntegerAttributeValue(attrs, Attribute.COLSPAN, -1)); attrs = new SimpleAttributeSet(); attrs.addAttribute(Attribute.HREF, new String("10101")); assertEquals(10101, HTML.getIntegerAttributeValue(attrs, Attribute.HREF, -1)); attrs = new SimpleAttributeSet(); attrs.addAttribute(Attribute.HREF, new String("not a number")); assertEquals(-1, HTML.getIntegerAttributeValue(attrs, Attribute.HREF, -1)); attrs = new SimpleAttributeSet(); assertEquals(-1, HTML.getIntegerAttributeValue(attrs, Attribute.HREF, -1)); final MutableAttributeSet wrongValue = new SimpleAttributeSet(); wrongValue.addAttribute(Attribute.HREF, new Integer("10101")); testExceptionalCase(new ClassCastCase() { public void exceptionalAction() throws Exception { HTML.getIntegerAttributeValue(wrongValue, Attribute.HREF, -1); } }); }