/** * 最近更新小说列表 */ private void initList(List<FictionModel> list) { FictionModel pushTitle = new FictionModel(); pushTitle.title = TYPE_TITLE_LIST; pushTitle.type = TYPE_TITLE; list.add(pushTitle); FictionModel kswListModel; Elements select = document.select("div#newscontent").select("div.l").select("span.s2").select("a"); for (Element element : select) { kswListModel = new FictionModel(); kswListModel.title = element.text(); kswListModel.detailUrl = element.attr("abs:href"); kswListModel.type = TYPE_UPDATE; list.add(kswListModel); } initAdd(list); }
/** * 每日推荐 * * @param str */ public List<IHistoryDailyPicks> takeDailyPick(String str) { Document document = Jsoup.parse(str); List<IHistoryDailyPicks> dailyPicksList = new ArrayList<>(); Elements element = document.getElementsByClass("tuijian").get(0) .getElementsByClass("box"); for (Element element2 : element) { IHistoryDailyPicks dailyPicks = new IHistoryDailyPicks(); Element info = element2.getElementsByClass("info").get(0); dailyPicks.setTitle(info.getElementsByTag("a").text());// title String time = info.getElementsByClass("time").text().trim(); dailyPicks.setTime(time.substring(0, time.length() - 1));// time dailyPicks.setDiscuss(info.getElementsByClass("pinglun").text());// Discuss dailyPicks.setDescribe(element2.getElementsByClass("info1").text());// Describe dailyPicks.setHref(AppUtils.Constants.URL_ILISHI + info.getElementsByTag("a").attr("href"));// Href dailyPicks.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));// imgHref dailyPicksList.add(dailyPicks); } return dailyPicksList; }
private String parseSelector(Field f) { String selector = f.getAnnotation(Selector.class).value(); Elements elems = doc.select(selector); if (elems.size() > 0) { final Element elem = elems.get(0); // Check which value annotation is present and retrieve data depending on the type of annotation if (f.isAnnotationPresent(TextValue.class)) { return elem.text(); } else if (f.isAnnotationPresent(HtmlValue.class)) { return elem.html(); } else if (f.isAnnotationPresent(AttributeValue.class)) { return elem.attr(f.getAnnotation(AttributeValue.class).name()); } else return elem.text(); } return null; }
private Set<String> getAlbumLinkList(String bandURL, String baseURL) throws Exception { Set<String> linkList = new HashSet<String>(); Document doc = HttpUtil.getDocument(bandURL); Elements albumLinks = doc.select("a[href*=/album/]"); for(Element link : albumLinks) { String albumLink = link.attr("href"); if(StringUtils.isNotBlank(albumLink) && albumLink.contains(".bandcamp.com/album")){ linkList.add(albumLink); } else if(StringUtils.isNotBlank(albumLink) && albumLink.startsWith("/album/")) { linkList.add(HttpUtil.addPaths(baseURL, link.attr("href"))); } } return linkList; }
private Set<String> getTrackLinkList(String bandURL, String baseURL) throws Exception { Set<String> linkList = new HashSet<String>(); Document doc = HttpUtil.getDocument(bandURL); Elements albumLinks = doc.select("a[href*=/track/]"); for(Element link : albumLinks) { String albumLink = link.attr("href"); if(StringUtils.isNotBlank(albumLink) && albumLink.contains(".bandcamp.com/track")){ linkList.add(albumLink); } else if(StringUtils.isNotBlank(albumLink) && albumLink.startsWith("/track/")) { linkList.add(HttpUtil.addPaths(baseURL, link.attr("href"))); } } return linkList; }
public void populatePointsGivenToK(Team team) { int rankCounter = 0; Element kPointsAllowedTable = kPointsAllowedURL.select("table").get(1); Elements kPointsAllowedRows = kPointsAllowedTable.select("tr"); for (int i = 2; i < kPointsAllowedRows.size(); i++) { Element row = kPointsAllowedRows.get(i); Elements cols = row.select("td"); rankCounter++; if(cols.get(0).text().contains(team.getName())) { if (cols.get(2).text().contains("*")) { team.setFpToKRank(rankCounter); team.setFpToKAvg(Double.parseDouble(cols.get(10).text())); break; } else { team.setFpToKRank(rankCounter); team.setFpToKAvg(Double.parseDouble(cols.get(11).text())); break; } } } }
@Override public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<AlbumInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "gbk")); Elements elements = document.select("#sliding li"); for (Element element : elements) { AlbumInfo temp = new AlbumInfo(); Elements title = element.select(".p-title"); if (title.size() > 0) temp.setTitle(title.get(0).text()); Elements album = element.select("a:has(img)"); temp.setAlbumUrl(baseUrl + album.attr("href")); Elements pic = album.select("img"); if (pic.size() > 0) temp.setPicUrl(pic.get(0).attr("src")); urls.add(temp); } resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(ContentsActivity.parameter.RESULT, urls); return resultMap; }
private UserInfo getUserInfo() { UserInfo userInfo = new UserInfo(); try { Connection.Response customerInfo = Jsoup.connect(VOICEMAIL_SERVICE_URI).cookies(loadCookies()).execute(); Document doc = customerInfo.parse(); Elements pseudo = doc.select("input[name=pseudo]"); Elements phoneNumber = doc.select("input[name=voip_num]"); Elements login = doc.select("input[name=login]"); Elements email = doc.select("input[name=email]"); Elements uid = doc.select("input[name=uid]"); userInfo.setPseudo((pseudo.size() > 0) ? pseudo.get(0).attr("value") : ""); userInfo.setPhoneNumber((phoneNumber.size() > 0) ? phoneNumber.get(0).attr("value") : ""); userInfo.setLogin((login.size() > 0) ? login.get(0).attr("value") : ""); userInfo.setEmail((email.size() > 0) ? email.get(0).attr("value") : ""); userInfo.setUid((uid.size() > 0) ? uid.get(0).attr("value") : ""); } catch (IOException e) { e.printStackTrace(); } return userInfo; }
public static ArrayList<String> getUrlsFromDoc(Document doc){ ArrayList<String> urls = new ArrayList<>(); Elements links = doc.getElementsByTag("a"); // Pattern pattern = Pattern.compile(""#(.*?)\\""); Pattern pattern = Pattern.compile(""#(.*?)\\\\");//should grab it without the annoying "\" at the end //the original is still there just in case ;) for (Element curr : links) { if(curr.text().equals("Add")){ Matcher matcher = pattern.matcher(curr.toString()); if(matcher.find()){ urls.add(matcher.group(1)); } } } return urls; }
@Override public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException { Document document = Jsoup.parse(new String(result, "gbk")); Elements elements = document.select("script"); for (Element element : elements) { String code = element.html(); if (!element.html().equals("")) { Pattern pattern = Pattern.compile("index_\\d*.htm\">下一页"); Matcher matcher = pattern.matcher(code); if (matcher.find()) { String temp = matcher.group(); return baseUrl + "rosimm/" + temp.substring(0, temp.length() - 5); } } } return ""; }
private static Elements linkFilter(Document doc) { // ��ȡ����a[href] Elements links = doc.select("a[href]"); // ����Text �ַ�С��5�����ӣ�һ��Ϊ�������ӣ���Ŀ������ int linkNum = 0; //System.out.println("linkNumBefore: " + links.size()); for (linkNum = 0; linkNum < links.size(); linkNum++) { //String linkHref = links.get(linkNum).attr("href"); String linkText = links.get(linkNum).text(); if(linkText.length() <= 5) { links.remove(linkNum); linkNum--; continue; } } //System.out.println("linkNumAfter = " + links.size()); return links; }
private static Map meiyuxsCatalog(Map map, String url) { try { List data = new ArrayList(); Document document = Jsoup .connect(url) .userAgent(FormatUtil.USER_AGENT_PC) .get(); Element body = document.body(); Elements catalogEles = body.getElementsByClass("list-group-item"); for (Element catalogE : catalogEles) { if (catalogE.getElementsByTag("a").size() > 0) { Map<String, Object> _map = new HashMap<>(); _map.put("catalog", catalogE.text()); _map.put("href", "http://www.meiyuxs.com" + catalogE.getElementsByTag("a").first().attr("href")); data.add(_map); } } map.put("data", data); map.put("cover", ""); map.put("lastChapter", ((Map) data.get(data.size() - 1)).get("catalog").toString()); } catch (IOException e) { e.printStackTrace(); } return map; }
@Override public void store(TaskResponse response) throws IOException { PrintWriter writer = new PrintWriter(new FileWriter("D://iplist/"+id+".txt",true),true); Elements els = response.select("#ip_list tr"); els.stream().filter(el -> el.select("td").size()>2).map(el -> { Elements es = el.select("td"); List<String> texts = es.stream().map(td -> td.text()).filter(text -> text.trim().length() > 0).collect(Collectors.toList()); String ip = ""; String port = ""; if(texts.size()>2){ ip = texts.get(0); port = texts.get(1); } return new String[]{ip,port}; }).forEach(name -> { System.out.println(Arrays.toString(name)); String line = name[0] + ":" + name[1]; writer.println(line); }); writer.close(); }
/** * 最近更新小说列表 */ private void initRecent(List<FictionModel> list) { FictionModel pushTitle = new FictionModel(); pushTitle.title = TYPE_TITLE_RETCENT; pushTitle.type = TYPE_TITLE; list.add(pushTitle); FictionModel kswHomeModel; Elements select = document.select("div#newscontent").select("div.l").select("span.s2").select("a"); for (Element element : select) { kswHomeModel = new FictionModel(); kswHomeModel.title = element.text(); kswHomeModel.detailUrl = element.attr("abs:href"); kswHomeModel.type = TYPE_RECENT; list.add(kswHomeModel); } initAdd(list); }
@Override public Result process(Request request, Page page) { Result result = new Result(); //解析HTML采用jsoup框架,详见:https://jsoup.org/ //解析页面标题 result.put("title", page.document().title()); //获取页面上的新的链接地址 Elements elements = page.document().select("a"); //获取所有a标签 for (int i = 0; i < elements.size(); i++) { String url = elements.get(i).absUrl("href"); //获取绝对url if (url != null && url.contains("baidu")) { page.addTargetRequest(url); //获取新url添加到任务队列 } } return result; }
@Transient private String getClozeTestQuestionContentValidationResult(JsonNode node) { String reason = null; String questionText = node.get("question").asText(); if (!questionText.contains("cloze=\"true\"")) { reason = "no embedded answers"; } else { Document doc = Jsoup.parse(questionText); Elements answers = doc.select("span[cloze=true]"); Set<String> distinctIds = answers.stream().map(a -> a.attr("id")).collect(Collectors.toSet()); if (answers.size() != distinctIds.size()) { reason = "duplicate ids found"; } else if (answers.stream() .map(a -> a.attr("precision")) .anyMatch(p -> p.isEmpty() || !NumberUtils.isParsable(p))) { reason = "invalid precision found"; } else if (answers.stream() .filter(a -> a.attr("numeric").equals("true")) .map(Element::text) .anyMatch(t -> !NumberUtils.isParsable(t))) { reason = "non-numeric correct answer for numeric question"; } } return reason; }
public void showWord() { try { String language; Languages l; l = (Languages) cmbLanguage.getSelectedItem(); language = l.getLang(); Document doc = Jsoup.connect("http://evilinsult.com/generate_insult.php?lang=" + language).get(); Elements links = doc.select("body"); for (Element link : links) { txtPaneShow.setText("\n" + link.text()); } } catch (RuntimeException e) { throw e; } catch (Exception ex) { txtPaneShow.setText("\n" + "Insult Outage! Please Check Your Internet Connection And Try Again In Three Minutes"); } }
@Override public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<PicInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "utf-8")); Elements elements = document.select("#big-pic img"); Elements title = document.select("#photos h1"); String sTitle = title.size() > 0 ? title.get(0).text() : ""; Elements tags = document.select(".fbl a"); List<String> tagList = new ArrayList<>(); if (tags.size() > 0) { for (Element tag : tags) tagList.add(tag.text()); } for (Element element : elements) { PicInfo picInfo = new PicInfo() .setTags(tagList) .setTitle(sTitle) .setPicUrl(element.attr("src")); urls.add(picInfo); } resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(DetailActivity.parameter.RESULT, urls); return resultMap; }
@Override public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<AlbumInfo> data = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "gbk")); Elements elements = document.select("dd a img:not([border])"); for (Element element : elements) { AlbumInfo temp = new AlbumInfo(); temp.setTitle(element.attr("alt")); temp.setPicUrl(element.attr("src").replaceAll("0.jpg", "m.jpg")); Pattern pattern = Pattern.compile("/\\d{3,4}"); Matcher matcher = pattern.matcher(element.attr("src")); if (matcher.find()) { temp.setAlbumUrl(baseUrl + matcher.group().substring(1) + ".html"); } data.add(temp); } resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(ContentsActivity.parameter.RESULT, data); return resultMap; }
public void populatePointsGivenToQB(Team team) { int rankCounter = 0; Element qbPointsAllowedTable = qbPointsAllowedURL.select("table").get(1); Elements qbPointsAllowedRows = qbPointsAllowedTable.select("tr"); for (int i = 2; i < qbPointsAllowedRows.size(); i++) { Element row = qbPointsAllowedRows.get(i); Elements cols = row.select("td"); rankCounter++; if(cols.get(0).text().contains(team.getName())) { if (cols.get(2).text().contains("*")) { team.setFpToQBRank(rankCounter); team.setFpToQBAvg(Double.parseDouble(cols.get(18).text())); break; } else { team.setFpToQBRank(rankCounter); team.setFpToQBAvg(Double.parseDouble(cols.get(19).text())); break; } } } }
public void populatePointsGivenToRB(Team team) { int rankCounter = 0; Element rbPointsAllowedTable = rbPointsAllowedURL.select("table").get(1); Elements rbPointsAllowedRows = rbPointsAllowedTable.select("tr"); for (int i = 2; i < rbPointsAllowedRows.size(); i++) { Element row = rbPointsAllowedRows.get(i); Elements cols = row.select("td"); rankCounter++; if(cols.get(0).text().contains(team.getName())) { if (cols.get(2).text().contains("*")) { team.setFpToRBRank(rankCounter); team.setFpToRBAvg(Double.parseDouble(cols.get(18).text())); break; } else { team.setFpToRBRank(rankCounter); team.setFpToRBAvg(Double.parseDouble(cols.get(19).text())); break; } } } }
public static Map<String, String> getFormMap_Kingo(String html, int formIndex) { Map<String, String> retVal = new HashMap<String, String>(); try { Document doc = Jsoup.parse(html); Elements elements = doc.select("form"); Element formElement = elements.get(formIndex); retVal.put("formAction", formElement.attr("action")); Elements inputElements = doc.select("input"); // System.out.println(inputElements); for (Element element : inputElements) { if (element.nodeName().equals("select")) { Element element5 = inputElements.select("option").first(); retVal.put(element.attr("name"), element5.attr("value")); } else { if (element.attr("name").equals("") || element.attr("name") == null) { } else { retVal.put(element.attr("name"), element.attr("value")); } } } } catch (Exception e) { retVal = null; } return retVal; }
private static Map sanjianggeCatalog(Map map, String url) { try { List data = new ArrayList(); Document document = Jsoup .connect(url) .userAgent(FormatUtil.USER_AGENT_PC) .get(); Element body = document.body(); Elements catalogEles = body.getElementById("list").getElementsByTag("dd"); for (Element catalogE : catalogEles) { if (catalogE.getElementsByTag("a").size() > 0) { Map<String, Object> _map = new HashMap<>(); _map.put("catalog", catalogE.text()); _map.put("href", url + catalogE.getElementsByTag("a").first().attr("href")); data.add(_map); } } String cover = body.getElementById("fmimg").getElementsByTag("img").first().attr("src"); map.put("data", data); map.put("cover", cover); map.put("lastChapter", ((Map) data.get(data.size() - 1)).get("catalog").toString()); } catch (IOException e) { e.printStackTrace(); } return map; }
public void loadPendingMateriales(final MaterialsCallback callback) { String json = "idmat=-1&codasi=-1&expresion=&direccion=&filtro=&pendientes=S"; UAWebService.HttpWebPostRequest(context, MATERIALES_UNSEEN, json, new UAWebService.WebCallBack() { @Override public void onNavigationComplete(boolean isSuccessful, String body) { if (isSuccessful) { Document doc = Jsoup.parse(body); //Get Post data Elements mensajes = doc.select(MATERIALES_LIST_BODY); parseBody(mensajes); callback.onResult(true, ""); } else { callback.onResult(false, body); } } }); }
@Override public List<String> getURLsFromPage(Document doc) { List<String> URLs = new ArrayList<>(); //Pictures Elements imgs = doc.select("div.img > img.img-front"); for (Element img : imgs) { String imageURL = img.attr("src"); imageURL = "https:" + imageURL; URLs.add(imageURL); } //Videos Elements vids = doc.select("div.video > video > source"); for (Element vid : vids) { String videoURL = vid.attr("src"); URLs.add("https:" + videoURL); } return URLs; }
@Override public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<PicInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "utf-8")); String sTitle = ""; Elements title = document.select("#header h1"); if (title.size() > 0) sTitle = title.get(0).text(); List<String> tagList = new ArrayList<>(); Elements tags = document.select("ul.tagList a"); if (tags.size() > 0) for (Element tag : tags) tagList.add(tag.text()); Elements elements = document.select("ul.gallery li:has(img)"); for (Element element : elements) { urls.add(new PicInfo(element.attr("data-src")).setTitle(sTitle).setTags(tagList)); } resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(DetailActivity.parameter.RESULT, urls); return resultMap; }
@Test public void leagueStatusCheck() throws Exception { Document doc = Jsoup.connect("http://www.espn.com/wnba/scoreboard/_/group/50") .timeout(60 * 1000) .maxBodySize(0) .get(); Elements scriptElements = doc.getElementsByTag("script"); Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*"); for (Element element : scriptElements) { for (DataNode node : element.dataNodes()) { if (node.getWholeData().startsWith("window.espn.scoreboardData")) { Matcher matcher = pattern.matcher(node.getWholeData()); if (matcher.matches()) { Gson gson = new Gson(); EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class); System.out.println(espnJson.getTeams()); assertEquals(false, espnJson.getTeams().isEmpty()); } } } } }
@Override public void process(final ExecutionContext executionContext, final Document document) throws ProcessException { try { Elements xkSections = document.getElementsByAttribute(XK_SECTION_ATTR); if (xkSections != null) { for (Element xkSectionTag : xkSections) { Element xkSectionStylesTag = xkSectionTag.getElementsByAttribute(XK_SECTION_STYLES_ATTR).first(); if (xkSectionStylesTag != null) { String xkSectionStyles = xkSectionStylesTag.attr(XK_SECTION_STYLES_ATTR); xkSectionTag.addClass(xkSectionStyles.trim()); xkSectionStylesTag.remove(); } } } } catch (Exception e) { throw new ProcessException(e); } }
@Test public void allFieldsShouldBePresentInView() throws Exception { String template = Util.readResource("/role-config.template.html"); final Document document = Jsoup.parse(template); final List<ProfileMetadata> metadataList = MetadataHelper.getMetadata(GitHubRoleConfiguration.class); for (ProfileMetadata field : metadataList) { final Elements inputFieldForKey = document.getElementsByAttributeValue("ng-model", field.getKey()); assertThat(inputFieldForKey, hasSize(1)); final Elements spanToShowError = document.getElementsByAttributeValue("ng-class", "{'is-visible': GOINPUTNAME[" + field.getKey() + "].$error.server}"); assertThat(spanToShowError, hasSize(1)); assertThat(spanToShowError.attr("ng-show"), is("GOINPUTNAME[" + field.getKey() + "].$error.server")); assertThat(spanToShowError.text(), is("{{GOINPUTNAME[" + field.getKey() + "].$error.server}}")); } final Elements inputs = document.select("textarea,input,select"); assertThat("should contains only inputs that defined in GitHubRoleConfiguration.java",inputs, hasSize(metadataList.size())); }
@Override public void onHandleParseHTML(final String url) { mView.showLoading(true); Observable.create(new ObservableOnSubscribe<ArrayList<ArticleItem>>() { @Override public void subscribe(ObservableEmitter<ArrayList<ArticleItem>> e) throws Exception { ArrayList<ArticleItem> list = new ArrayList<>(); Document doc = Jsoup.connect(url).get(); Elements ul = doc.getElementsByClass("list_line"); for (Element u : ul) { Elements li = u.getElementsByTag("li"); for (Element l : li) { String text = l.getElementsByTag("a").text(); String href = l.getElementsByTag("a").attr("href"); String time = l.getElementsByTag("span").text(); list.add(new ArticleItem(text, href, time)); } } e.onNext(list); } }) .subscribeOn(Schedulers.io()) .observeOn(AndroidSchedulers.mainThread()) .subscribe(new Consumer<ArrayList<ArticleItem>>() { @Override public void accept(@NonNull ArrayList<ArticleItem> articleItems) throws Exception { mView.showList(articleItems); mView.showLoading(false); } }); }
public List<ImageModel> getImageDetail() { List<ImageModel> list = new ArrayList<>(); ImageModel imageDetailModel; Elements select = document.select("div#picture").select("img"); for (Element element : select) { imageDetailModel = new ImageModel(); imageDetailModel.url = element.attr("src"); list.add(imageDetailModel); } return list; }
@Override public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException { List<PicInfo> urls = new ArrayList<>(); Document document = Jsoup.parse(new String(result, "gb2312")); Elements elements = document.select("div.picshowtop img"); if (elements.size() > 0) urls.add(new PicInfo(baseUrl + elements.get(0).attr("src"))); resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl); resultMap.put(DetailActivity.parameter.RESULT, urls); return resultMap; }
private static String GetDivContent(Element div) { StringBuilder sb = new StringBuilder(); //考虑div里标签内容的顺序,对div子树进行深度优先搜索 Stack<Element> sk = new Stack<Element>(); sk.push(div); while (!sk.empty()) { // Element e = sk.pop(); //对于div中的div过滤掉 if (e != div && e.tagName().equals("div")) continue; //考虑正文被包含在p标签中的情况,并且p标签里不能含有a标签 if (e.tagName().equals("p") && e.getElementsByTag("a").size() == 0) { String className = e.className(); if (className.length() != 0 && className.equals("pictext")) continue; sb.append(e.text()); sb.append("\n"); continue; } else if (e.tagName().equals("td")) { //考虑正文被包含在td标签中的情况 if (e.getElementsByTag("div").size() != 0) continue; sb.append(e.text()); sb.append("\n"); continue; } //将孩子节点加入栈中 Elements children = e.children(); for (int i = children.size() - 1; i >= 0; i--) { sk.push((Element) children.get(i)); } } return sb.toString(); }
@Override public List<SIPNode> call(XpathNode.ScopeEm scopeEm, Elements elements, List<String> args) { List<SIPNode> res = new LinkedList<SIPNode>(); if (elements != null && elements.size() > 0) { for (Element e : elements) { res.add(SIPNode.t(e.outerHtml())); } } return res; }
private void runInSameTime(final Element element) throws IOException { mExecutorService.submit(new Runnable() { @Override public void run() { String url = element.attr("abs:href"); try { Document document = Jsoup.connect(url) .ignoreContentType(true) .ignoreHttpErrors(true) .userAgent(Url.MOBBILE_AGENT) .get(); String name = document.select("#okBookShow > div.ok-book-base-info > div.row > div.col-sm-8.ok-book-info > div.ok-book-meta > h1").text(); String time = ""; String info = ""; Elements elements = document.select("#okBookShow > div.ok-book-base-info > div.row > div.col-sm-8.ok-book-info > div.ok-book-meta > div.ok-book-desc > div.ok-book-meta-content").select("p"); for (int i = 0; i < elements.size(); i++) { if (!elements.get(i).text().equals("")) info = info + elements.get(i).text() + "\n\n"; } String category = document.select("#okBookShow > div.ok-book-base-info > div.row > div.col-sm-8.ok-book-info > div.ok-book-meta > div.ok-book-subjects").text(); String status = ""; String author = document.select("#okBookShow > div.ok-book-base-info > div.row > div.col-sm-8.ok-book-info > div.ok-book-meta > div.row > div > div").text(); String words = ""; String pic = document.select("#okBookShow > div.ok-book-base-info > div.row > div.col-sm-4 > div > img").attr("abs:src"); NovelBean no = new NovelBean(name, time, info, category, status, author, words, pic, url); list.add(no); } catch (IOException e) { e.printStackTrace(); } latch.countDown(); } }); }
@Override public List<SIPNode> call(XpathNode.ScopeEm scopeEm, Elements elements, List<String> args) { List<SIPNode> res = new LinkedList<SIPNode>(); if (elements != null && elements.size() > 0) { for (Element e : elements) { res.add(SIPNode.t(e.html())); } } return res; }
private void setOverviews(Elements elements) { for(Element e : elements){ overviews[tmp] = new Overview( e.select("td.period").text().trim(), e.select("td.class").text().trim(), e.select("td.grade").text(), e.select("td.location").text().trim() ); tmp++; } }
@Override public List<Proxy> parseProxys(String content) { Document doc = Jsoup.parse(content); Elements elements = doc.select("div#list table tbody tr"); List<Proxy> proxyList = new ArrayList<>(); for(Element tr : elements){ Elements tds = tr.children(); String ip = tds.get(0).text().trim(); Integer port = Integer.parseInt(tds.get(1).text()); proxyList.add(new Proxy(ip,port)); } return proxyList; }
@Override public List<SIPNode> call(XpathNode.ScopeEm scopeEm, Elements elements, List<String> args) { List<SIPNode> res = new LinkedList<SIPNode>(); if (elements != null) { Pattern pattern = Pattern.compile("\\d+"); for (Element e : elements) { Matcher matcher = pattern.matcher(e.ownText()); if (matcher.find()) { res.add(SIPNode.t(matcher.group())); } } } return res; }
private Repository parseCollectionsRepositoryData(Element element) throws Exception{ String fullName = element.select("div > div > a").attr("href"); String owner = fullName.substring(1, fullName.lastIndexOf("/")); String repoName = fullName.substring(fullName.lastIndexOf("/") + 1); String ownerAvatar = element.select("div > div > a > img").attr("src"); Elements articleElements = element.getElementsByTag("div"); Element descElement = articleElements.get(articleElements.size() - 2); StringBuilder desc = new StringBuilder(""); for(TextNode textNode : descElement.textNodes()){ desc.append(textNode.getWholeText()); } Element numElement = articleElements.last(); String starNumStr = numElement.select("a").get(0).textNodes().get(1).toString(); String forkNumStr = numElement.select("a").get(1).textNodes().get(1).toString(); String language = ""; Elements languageElements = numElement.select("span > span"); if(languageElements.size() > 0){ language = numElement.select("span > span").get(1).textNodes().get(0).toString(); } Repository repo = new Repository(); repo.setFullName(fullName); repo.setName(repoName); User user = new User(); user.setLogin(owner); user.setAvatarUrl(ownerAvatar); repo.setOwner(user); repo.setDescription(desc.toString()); repo.setStargazersCount(Integer.parseInt(starNumStr.replaceAll(" ", ""))); repo.setForksCount(Integer.parseInt(forkNumStr.replaceAll(" ", ""))); repo.setLanguage(language); return repo; }