/** * 웹툰조회 */ public void getWebtoon(String code) { if (!"".equals(code)) { CommonService cs = new CommonService(); Connection conn = cs.getConnection(code); conn.timeout(5000); Document doc = null; codeInputField.setText(code); wDesc.setWrapText(true); try { doc = conn.get(); String title = doc.select("title").text().split("::")[0]; setTitle(title); String author = doc.select("div.detail h2 > span").text(); wTitle.setText(title + "(" + author + ")"); String desc = doc.select("div.detail p").text(); wDesc.setText(desc); String img = doc.select("div.thumb > a img").attr("src"); thumbnail.setImage(new Image(img, true)); } catch (Exception e) { e.printStackTrace(); } } else { Platform.runLater(new Runnable() { @Override public void run() { AlertSupport alert = new AlertSupport("웹툰코드를 입력하세요."); alert.alertInfoMsg(stage); } }); } }
@JSStaticFunction public static void parseToText(final String url, final String option, final Function func) throws IOException { new Thread(new Runnable() { @Override public void run() { Document document = null; try { document = Jsoup.connect(url).get(); Elements element = document.select(option); func.call(context, scope, scope, new Object[] { element.text(), null }); } catch (IOException e) { try { func.call(context, scope, scope, new Object[] { null, e }); } catch (Exception err) {} } } }).start(); }
@Override public List<String> getURLsFromPage(Document doc) { List<String> result = new ArrayList<>(); for (Element thumb : doc.select("div.picture_view > div.pictures_block > div.items > div.item-container > a > div.thumb_container > div.img > img")) { String image = thumb.attr("src"); // replace thumbnail urls with the urls to the full sized images image = image.replaceAll( "https://upt.xhcdn\\.", "http://up.xhamster."); image = image.replaceAll("ept\\.xhcdn", "ep.xhamster"); image = image.replaceAll( "_160\\.", "_1000."); // Xhamster has bad cert management and uses invalid certs for some cdns, so we change all our requests to http image = image.replaceAll("https", "http"); result.add(image); } return result; }
/** * 方法说明:绑定单cookie模拟浏览器,返回document对象 * * @param url 被访问url * @param cookieKey 绑定cookie的key * @param cookieValue 绑定cookie的value * @return Document 返回document对象 * @throws Exception */ public static Document getDocumentWithCookie(String url, String cookieKey, String cookieValue) throws Exception { Document doc = null; if (StringUtil.isEmpty(cookieKey) && StringUtil.isEmpty(cookieValue)) { doc = getDocument(url); } else if (!StringUtil.isEmpty(cookieKey) && !StringUtil.isEmpty(cookieValue)){ Map<String, String> cookiesMap = new HashMap<String, String>(); cookiesMap.put(cookieKey, cookieValue); doc = getDocumentWithCookies(url, cookiesMap); } else { // parameter is error. 参数が不正である、所传参数错误。 throw new IllegalArgumentException("key or value is err"); // TODO hard coding is fixing bluetata 2017/03/20 add } return doc; }
public static List<MatchedDate> extractFromProperties(Document document) { List<MatchedDate> result = Lists.newArrayList(); for (String selector : ITEMPROP_SELECTORS) { document.select(selector).forEach(m -> { String datetime = m.attr("datetime"); String content = m.attr("content"); String title = m.attr("title"); if (!Strings.isNullOrEmpty(datetime)) { result.add(new MatchedDate(datetime, selector)); } else if (!Strings.isNullOrEmpty(content)) { result.add(new MatchedDate(content, selector)); } else if (!Strings.isNullOrEmpty(title)) { result.add(new MatchedDate(title, selector)); } }); } return result; }
@Override public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<AlbumInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "utf-8")); Elements elements = document.select("div.album"); for (Element element : elements) { AlbumInfo temp = new AlbumInfo(); Elements title = element.select("span.name"); if (title.size() > 0) temp.setTitle(title.get(0).text()); Elements album = element.select(".pic_box a"); temp.setAlbumUrl(album.attr("href")); Elements pic = album.select("img"); if (pic.size() > 0) temp.setPicUrl(pic.get(0).attr("src")); urls.add(temp); } resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(ContentsActivity.parameter.RESULT, urls); return resultMap; }
static void Wikipedia(String dico) { Document significatowikipedia = null; String cercowikipedia = dico.substring((dico.indexOf("'")) + 1, (dico.lastIndexOf("'"))); try { significatowikipedia = Jsoup.connect("https://it.wikipedia.org/wiki/" + cercowikipedia.replace(" ", "_")).userAgent("Mozilla").get(); String divs = significatowikipedia.select("p").text(); if (!divs.equals("")) { new GUI().giveResponse("La ricerca di " + cercowikipedia + " su wikipedia ha restituito il seguente risultato:" + '\n' + divs); } else { new GUI().giveResponse("Mi dispiace, non ho trovato informazioni su " + cercowikipedia + " su Wikipedia..."); } } catch (HttpStatusException e) { new GUI().giveResponse("Mi dispiace, Wikipedia sembra non avere una voce per '" + cercowikipedia +"'..."); } catch (java.io.IOException f) { f.printStackTrace(); } catch (StringIndexOutOfBoundsException g) { new GUI().giveResponse("Ricorda che, perché io cerchi informazioni riguardo a qualcosa, occorre che tu la definisca fra due virgolette!"); } }
/** * Test that {@link PawpedsDocumentParser#parseSearch(Document)} throws an * {@link IllegalArgumentException} if there is an jsoup parsing error. */ @Test(expected = IllegalArgumentException.class) public void testJsoupSelectorUnexpectedError() throws Exception { // Given Document document = mock(Document.class); Elements noErrorElement = mock(Elements.class); when(noErrorElement.text()).thenReturn(""); when(document.select("th.error")).thenReturn(noErrorElement); when(document.select("table.searchresult tr.searchresult:has(td.searchresult)")).thenThrow(SelectorParseException.class); // When pawpedsDocumentParser.parseSearch(document); // Then // the exception is expected }
public void loadAnuncios(final AnunciosCallback callback) { UAWebService.HttpWebGetRequest(context, ANUNCIOS_URL, new UAWebService.WebCallBack() { @Override public void onNavigationComplete(boolean isSuccessful, String body) { if (isSuccessful) { Document doc = Jsoup.parse(body); //Get Post data Element anuncios = doc.select(ANUNCIOS_LIST_BODY).first(); try { for (Element anuncio : anuncios.children()) { parseAnuncio(anuncio, ""); } callback.onResult(true, ""); } catch (NullPointerException e) { FirebaseCrash.log(body); FirebaseCrash.report(e); callback.onResult(false, ErrorManager.LOGIN_REJECTED); //Usually because session ended! } } else { callback.onResult(false, body); } } }); }
@Override public List<ImageModel> getT(Document document) { if (view == null) { return new ArrayList<>(); } switch (view.getType()) { case ApiConfig.Type.DOU_BAN_MEI_ZI: return JsoupDoubanManager.get(document).getImageList(); case ApiConfig.Type.KK: return JsoupKKManager.get(document).getImageList(); case ApiConfig.Type.M_ZI_TU: return JsoupMZiTuManager.get(document).getImageList(); case ApiConfig.Type.MM: return JsoupMMManager.get(document).getImageList(); case ApiConfig.Type.MEIZITU: return JsoupMeiZiTuManager.get(document).getImageList(); default: return new ArrayList<>(); } }
@Override public List<String> getDescriptionsFromPage(Document page) { List<String> textURLs = new ArrayList<>(); // Iterate over all thumbnails for (Element thumb : page.select("div.zones-container span.thumb")) { logger.info(thumb.attr("href")); if (isStopped()) { break; } Element img = thumb.select("img").get(0); if (img.attr("transparent").equals("false")) { continue; // a.thumbs to other albums are invisible } textURLs.add(thumb.attr("href")); } return textURLs; }
/** * Login to Flickr. * @return Cookies for logged-in session * @throws IOException */ @SuppressWarnings("unused") private Map<String,String> signinToFlickr() throws IOException { Response resp = Jsoup.connect("http://www.flickr.com/signin/") .userAgent(USER_AGENT) .followRedirects(true) .method(Method.GET) .execute(); Document doc = resp.parse(); Map<String,String> postData = new HashMap<>(); for (Element input : doc.select("input[type=hidden]")) { postData.put(input.attr("name"), input.attr("value")); } postData.put("passwd_raw", ""); postData.put(".save", ""); postData.put("login", new String(Base64.decode("bGVmYWtlZGVmYWtl"))); postData.put("passwd", new String(Base64.decode("MUZha2V5ZmFrZQ=="))); String action = doc.select("form[method=post]").get(0).attr("action"); resp = Jsoup.connect(action) .cookies(resp.cookies()) .data(postData) .method(Method.POST) .execute(); return resp.cookies(); }
private static Map meiyuxsCatalog(Map map, String url) { try { List data = new ArrayList(); Document document = Jsoup .connect(url) .userAgent(FormatUtil.USER_AGENT_PC) .get(); Element body = document.body(); Elements catalogEles = body.getElementsByClass("list-group-item"); for (Element catalogE : catalogEles) { if (catalogE.getElementsByTag("a").size() > 0) { Map<String, Object> _map = new HashMap<>(); _map.put("catalog", catalogE.text()); _map.put("href", "http://www.meiyuxs.com" + catalogE.getElementsByTag("a").first().attr("href")); data.add(_map); } } map.put("data", data); map.put("cover", ""); map.put("lastChapter", ((Map) data.get(data.size() - 1)).get("catalog").toString()); } catch (IOException e) { e.printStackTrace(); } return map; }
@Override public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<PicInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "utf-8")); PicInfo picInfo = new PicInfo(); Elements elements = document.select("#bigpic img"); for (Element element : elements) { picInfo.setPicUrl(baseUrl + element.attr("src")); } Elements title = document.select("#entry h1"); if (title.size() > 0) picInfo.setTitle(title.text()); Elements tags = document.select(".postinfo a"); if (tags.size() > 0) { List<String> tagList = new ArrayList<>(); for (Element t : tags) tagList.add(t.text()); picInfo.setTags(tagList); } urls.add(picInfo); resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(DetailActivity.parameter.RESULT, urls); return resultMap; }
private static Map qulaCatalog(Map map, String url) { try { List data = new ArrayList(); Document document = Jsoup .connect(url) .userAgent(FormatUtil.USER_AGENT_PC) .get(); Element body = document.body(); Elements catalogEles = body.getElementById("list").getElementsByTag("dd"); for (Element catalogE : catalogEles) { if (catalogE.getElementsByTag("a").size() > 0) { Map<String, Object> _map = new HashMap<>(); _map.put("catalog", catalogE.text()); _map.put("href", url + catalogE.getElementsByTag("a").first().attr("href")); data.add(_map); } } String cover = body.getElementById("fmimg").getElementsByTag("img").first().attr("src"); map.put("data", data); map.put("cover", cover); map.put("lastChapter", ((Map) data.get(data.size() - 1)).get("catalog").toString()); } catch (IOException e) { e.printStackTrace(); } return map; }
protected StudentAndParent getSnp(String fixtureFileName) throws Exception { String input = FixtureHelper.getAsString(getClass().getResourceAsStream(fixtureFileName)); Document tablePageDocument = Jsoup.parse(input); StudentAndParent snp = Mockito.mock(StudentAndParent.class); Mockito.when(snp.getSnPPageDocument(Mockito.anyString())) .thenReturn(tablePageDocument); Mockito.when(snp.getSemesters(Mockito.any(Document.class))).thenCallRealMethod(); Mockito.when(snp.getCurrentSemester(Mockito.<Semester>anyList())) .thenCallRealMethod(); Mockito.when(snp.getRowDataChildValue(Mockito.any(Element.class), Mockito.anyInt())).thenCallRealMethod(); return snp; }
private Map<Integer, List<TeamResult>> getResults(String leagueName) throws IOException{ String url = GlobalConfiguration.baseURL + leagueName + GlobalConfiguration.calendarSuffix; Document doc = Jsoup.connect(url).get(); Map<Integer, List<TeamResult>> results = new HashMap<>(); Elements calendarDays = doc.select(".table"); Iterator it = calendarDays.iterator(); int day = 1; while (it.hasNext()) { Element calendarDay = (Element) it.next(); if (calendarDay.children().is(".greyfoot")) { break; } List<TeamResult> teamResults = new ArrayList<>(); for (Element match : calendarDay.select(".match")) { teamResults.add(new TeamResult(match.children().get(0).text(), Double.parseDouble(match.children().get(1).text().replace(",", ".")))); teamResults.add(new TeamResult(match.children().get(3).text(), Double.parseDouble(match.children().get(2).text().replace(",", ".")))); } results.put(day, teamResults); day++; } return results; }
@Override public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<PicInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "utf-8")); String sTitle = ""; Elements title = document.select("#header h1"); if (title.size() > 0) sTitle = title.get(0).text(); List<String> tagList = new ArrayList<>(); Elements tags = document.select("ul.tagList a"); if (tags.size() > 0) for (Element tag : tags) tagList.add(tag.text()); Elements elements = document.select("ul.gallery li:has(img)"); for (Element element : elements) { urls.add(new PicInfo(element.attr("data-src")).setTitle(sTitle).setTags(tagList)); } resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(DetailActivity.parameter.RESULT, urls); return resultMap; }
/** * Create a new OVH Application using https://eu.api.ovh.com/createApp/ * Outout the Application Key and Application Secret in std-out * @param nic * @param password * @throws IOException */ public void createApplication(String nic, String password) throws IOException { String url = "https://eu.api.ovh.com/createApp/"; Document doc = Jsoup.connect(url) .data("nic", nic) .data("password", password) .data("applicationName", "One Shoot Token") .data("applicationDescription", "One Shoot Token") .post(); String body = doc.toString(); Pattern extract = Pattern.compile(" Application (\\w+)<pre><name>([^<]+)</name></pre>"); Matcher m = extract.matcher(body); String Key = null; String Secret = null; while (m.find()) { String k = m.group(1); String v = m.group(2); if (k.equals("Key")) Key = v; if (k.equals("Secret")) Secret = v; } log.warn("Key:{} Secret:{}", Key, Secret); }
/** * 方法说明:绑定单data(parameter)模拟浏览器,并返回document对象 * * @param url 被访问的url * @param dataKey parameter的key * @param dataValue parameter的value * @return Document 返回document对象 * @throws Exception */ public static Document getDocumentWithData(String url, String dataKey, String dataValue) throws Exception { Document doc = null; if (StringUtil.isEmpty(dataKey) && StringUtil.isEmpty(dataValue)) { doc = getDocument(url); } else if (!StringUtil.isEmpty(dataKey) && !StringUtil.isEmpty(dataValue)){ Map<String, String> dataMap = new HashMap<String, String>(); dataMap.put(dataKey, dataValue); doc = getDocumentWithData(url, dataMap); } else { // parameter is error. 参数が不正である、所传参数错误。 throw new IllegalArgumentException("key or value is err"); // TODO hard coding is fixing bluetata 2017/03/20 add } return doc; }
private Document getLargestImagePageDocument(URL url) throws IOException { // Get current page Document doc = Http.url(url).get(); // Look for larger image page String largestImagePage = this.url.toExternalForm(); for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) { Elements ola = olSize.select("a"); if (ola.size() == 0) { largestImagePage = this.url.toExternalForm(); } else { String candImage = ola.get(0).attr("href"); if (candImage.startsWith("/")) { candImage = "http://www.flickr.com" + candImage; } largestImagePage = candImage; } } if (!largestImagePage.equals(this.url.toExternalForm())) { // Found larger image page, get it. doc = Http.url(largestImagePage).get(); } return doc; }
public void showWord() { try { String language; Languages l; l = (Languages) cmbLanguage.getSelectedItem(); language = l.getLang(); Document doc = Jsoup.connect("http://evilinsult.com/generate_insult.php?lang=" + language).get(); Elements links = doc.select("body"); for (Element link : links) { txtPaneShow.setText("\n" + link.text()); } } catch (RuntimeException e) { throw e; } catch (Exception ex) { txtPaneShow.setText("\n" + "Insult Outage! Please Check Your Internet Connection And Try Again In Three Minutes"); } }
@Override public void run() { try { Document doc = getLargestImagePageDocument(this.url); Elements fullsizeImages = doc.select("div#allsizes-photo img"); if (fullsizeImages.size() == 0) { logger.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'"); } else { String prefix = ""; if (Utils.getConfigBoolean("download.save_order", true)) { prefix = String.format("%03d_", index); } synchronized (flickrThreadPool) { addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix); } } } catch (IOException e) { logger.error("[!] Exception while loading/parsing " + this.url, e); } }
private static void downloadSummary(String name, Document doc) { // 写文件 try { String summary = doc.select(".summary p").first().text(); logger.debug(summary); FileUtils.write(new File(rootPath + "Summary.data"), name + "\n" + summary + "\n", "utf-8", true); } catch (Exception e) { logger.error("个人描述信息 写入:【" + name + "】\t失败!"); try { FileUtils.write(new File(rootPath + "ErrorSummary.data"), name + "\t" + e.toString() + "\n", "utf-8", true); } catch (IOException e1) { e1.printStackTrace(); } } }
/** * Tries to parse the image url out of the description. If it fails <code>null</code> will be returned. * @return Image url in the description. <code>Null</code> if no description was found. */ private String parseImage() { final Document doc = Jsoup.parse(getDescription()); final Elements imgs = doc.getElementsByTag("img"); for (final Element img : imgs) { String src = img.attr("src"); if(StringUtils.isNotBlank(src)) { if(src.startsWith("/")) { try { final URL feedUrl = new URL(document.getContentSource().getUrl()); src = "//" + feedUrl.getHost() + src; } catch (final MalformedURLException e) { // next continue; } } return src; } } return null; }
private void fetchImage() { try { Document doc = Http.url(url).get(); // Find image Elements images = doc.select(".image-container img"); if (images.size() == 0) { logger.warn("Image not found at " + this.url); return; } Element image = images.first(); String imgsrc = image.attr("src"); logger.info("Found URL " + imgsrc); // Provide prefix and let the AbstractRipper "guess" the filename String prefix = ""; if (Utils.getConfigBoolean("download.save_order", true)) { prefix = String.format("%03d_", index); } addURLToDownload(new URL(imgsrc), prefix); } catch (IOException e) { logger.error("[!] Exception while loading/parsing " + this.url, e); } }
@Test public void team_name_isValid() throws Exception { String url = "https://www.basketball-reference.com/boxscores/201706120GSW.html"; Document doc = Jsoup.connect(url).get(); Element line_score = doc.getElementById("div_line_score"); String away_team_name = line_score.getElementsByTag("tr").get(1).getElementsByTag("td").get(1).text(); assertEquals(away_team_name, "CLE"); }
/** * Uses Jsoup to convert from HTML to XHTML */ private byte[] formatToXHtml(String html, Charset charset) { Document document = Jsoup.parseBodyFragment(html); document.outputSettings().syntax(Document.OutputSettings.Syntax.xml); document.outputSettings().charset(charset); return document.toString().getBytes(charset); }
@Override public List<String> getURLsFromPage(Document doc) { List<String> imageURLs = new ArrayList<>(); for (Element thumb : doc.select("div.ngg-gallery-thumbnail > a > img")) { String imageURL = thumb.attr("src"); imageURL = imageURL.replace("thumbs/thumbs_", ""); imageURLs.add(imageURL); } return imageURLs; }
private Document getDocument(String url, Boolean validateCert) throws IOException { return Jsoup.connect(url) .userAgent(userAgent) .timeout(12000) .referrer("http://www.google.com") .followRedirects(true) .ignoreHttpErrors(true) .ignoreContentType(true) .validateTLSCertificates(validateCert) .get(); }
@Override public T convert(ResponseBody value) throws IOException { Document parse = Jsoup.parse(value.string()); try { if (mT == Document.class) { return (T) parse; } return AJsoupReader.deserialize(parse, (Class<T>) mT); } finally { value.close(); } }
@Override public SipNodes createOrGetModel() { if (model == null) { try { Document document = Jsoup.parse(getRawText(), getBaseUrl()); if (document == null) { throw new RuntimeException(); } model = new SipNodes(SIPNode.e(document)); } catch (Exception e) { model = new SipNodes(SIPNode.t(getRawText())); } } return model; }
@Override public List<String> getURLsFromPage(Document doc) { List<String> result = new ArrayList<>(); for (Element el : doc.select("div.single-post > div.gallery > dl > dt > a > img")) { String imageSource = el.attr("data-lazy-src"); // We remove the .md from images so we download the full size image // not the thumbnail ones imageSource = imageSource.replaceAll("-\\d\\d\\dx\\d\\d\\d", ""); result.add(imageSource); } return result; }
public Integer getData(String link,String searchWord) { int count = 0; try { Trie myTrie = new Trie(); //Using the jsoup to read through each webpage. Document document = Jsoup.connect(link).get(); //Reading only the paragraph tags from the specified link. Elements paragraph = document.select("p"); TreeMap<String, Integer> frequencyData = new TreeMap<String, Integer>(); //Looping through all the paragraphs for(Element para : paragraph) { String p = para.text(); p = p.replaceAll("[,.!?:;()-]", "\\s");//removing all the punctuations and replacing with blank spaces. //for each of the above paragraph extract individual words and insert them in trie data structure. for (String word : p.split(" ")) { if (StopWord.is(word.toLowerCase())) continue; myTrie.insert(word.toLowerCase()); count = PageRanking.getCount(word, frequencyData) + 1 ; frequencyData.put(word, count); } } int temp = PageRanking.getCount(searchWord, frequencyData); count = temp; // System.out.println(searchWord+" found in "+ link + " "+myTrie.search(searchWord)); // System.out.println(searchWord+" occurred "+temp+" times "); } catch(Exception e) { e.printStackTrace(); } return count; }
public static HttpArticleParseResult extractArticleWithDetails(String html, String url, HttpSource source, String publishedHint) { Document document = Jsoup.parse(html, url); HttpArticleParseResult result = new HttpArticleParseResult(); HttpArticle article = new HttpArticle(); article.setUrl(url); article.setSource(source.getUrl()); article.setAppIds(source.getAppIds()); article.setCategories(source.getCategories()); List<String> ldJsons = JsonLdParser.extractJsonLdParts(document); JsonLdParser.JsonLdArticle ldJsonArticle = JsonLdParser.parse(ldJsons); List<MatchedString> titles = extractTitlesWithJsoup(document, ldJsonArticle, source); article.setTitle(titles.stream().map(MatchedString::getValue).collect(Collectors.joining("\n"))); result.setTitleMatches(titles.stream().map(MatchedString::getMatch).collect(Collectors.toList())); List<MatchedString> texts = extractTextsWithJsoup(document, source); article.setText(texts.stream() .map(MatchedString::getValue) .map(t -> TextFilters.normalizeText(t, source.getTextNormalizers())) .collect(Collectors.joining("\n"))); result.setTextMatches(texts.stream().map(MatchedString::getMatch).distinct().collect(Collectors.toList())); List<MatchedDate> publicationDates = extractPublicationDates(html, document, ldJsonArticle, source, publishedHint); MatchedDate publicationDate = publicationDates.stream().filter(d -> d.getDate() != null).findFirst().orElse(null); article.setPublished(publicationDate != null ? publicationDate.getDate() : null); result.setPublishedPattern(publicationDate != null ? publicationDate.getPattern() : null); List<String> publishedTexts = publicationDate != null ? Lists.newArrayList(publicationDate.getValue()) : publicationDates.stream().map(MatchedDate::getValue).collect(Collectors.toList()); result.setPublishedTexts(publishedTexts); List<String> publishedMatches = publicationDate != null ? Lists.newArrayList(publicationDate.getMatch()) : publicationDates.stream().map(MatchedDate::getMatch).collect(Collectors.toList()); result.setPublishedMatches(publishedMatches); result.setArticle(article); return result; }
@Override public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException { Document document = Jsoup.parse(new String(result, "utf-8")); Elements elements = document.select("#pageNum a:containsOwn(下一页)"); if (elements.size() > 0) return baseUrl + elements.get(0).attr("href"); return ""; }
private ArrayList<ApsvOrder> findOrders(String html) { //logger.info("Html: {}", html); ArrayList<ApsvOrder> orders = new ArrayList<>(); Document doc = Jsoup.parse(html); Element ordersForm = doc.getElementById("J-submit-form"); if (ordersForm == null) { logger.error("Cannot find order list form, maybe cookie expires"); // 标记task status为异常 // TODO 弹窗提醒cookie异常 RunTasksModel.getInstance().MarkTaskException(task.id); return orders; } Elements tableBody = doc.select("#tradeRecordsIndex>tbody"); Elements orderRows = tableBody.select("tr"); orderRows.forEach(row -> { Elements timeNodes = row.select("td.time p"); String[] orderNoData = row.select("td.tradeNo p").text().split("\\|"); ApsvOrder order = new ApsvOrder(){ { taskId = task.id; time = timeNodes.get(0).text() + " " + timeNodes.get(timeNodes.size() - 1).text(); description = row.select(".memo-info").text(); memo = row.select("td.memo p").text(); tradeNo = orderNoData.length > 1 ? orderNoData[1].split(":")[1] : orderNoData[0].split(":")[1]; username = Unicode.unicodeToString(row.select("td.other p").text()); amount = Float.parseFloat(row.select("td.amount span").text().replaceAll("\\s+", "")); status = row.select("td.status p").text(); } }; order.sig = Order.Sign(order, task.pushSecret); orders.add(order); }); return orders; }
@Override public void onHandleParseHTML(final String url) { mView.showLoading(true); Observable.create(new ObservableOnSubscribe<ArrayList<ArticleItem>>() { @Override public void subscribe(ObservableEmitter<ArrayList<ArticleItem>> e) throws Exception { ArrayList<ArticleItem> list = new ArrayList<>(); Document doc = Jsoup.connect(url).get(); Elements ul = doc.getElementsByClass("list_line"); for (Element u : ul) { Elements li = u.getElementsByTag("li"); for (Element l : li) { String text = l.getElementsByTag("a").text(); String href = l.getElementsByTag("a").attr("href"); String time = l.getElementsByTag("span").text(); list.add(new ArticleItem(text, href, time)); } } e.onNext(list); } }) .subscribeOn(Schedulers.io()) .observeOn(AndroidSchedulers.mainThread()) .subscribe(new Consumer<ArrayList<ArticleItem>>() { @Override public void accept(@NonNull ArrayList<ArticleItem> articleItems) throws Exception { mView.showList(articleItems); mView.showLoading(false); } }); }
private int parseVolum(final Document dom) { final Elements volum = dom.select(VOLUM_CSS_SELECTOR); try { return Integer.parseInt(volum.text().substring(VOLUM_TEXT_OFFSET)); } catch (Exception e) { return 0; } }
@Override public List<String> getURLsFromPage(Document page) { List<String> urls = new ArrayList<>(); Elements urlElements = page.select("figure.t-image > b > u > a"); for (Element e : urlElements) { urls.add(getImageFromPost(urlBase + e.select("a").first().attr("href"))); } return urls; }