我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用bs4.element.Doctype()。
def text(self, target=None, ignore_pureascii_words=False): """ Get all text in HTML, skip script and comment :param target: the BeatuifulSoup object, default self.b :param ignore_pureascii_words: if set True, only return words that contains Chinese charaters (may be useful for English version website) :return: list of str """ if target is None: target = self.b from bs4 import Comment from bs4.element import NavigableString,Doctype result = [] for descendant in target.descendants: if not isinstance(descendant, NavigableString) \ or isinstance(descendant,Doctype) \ or descendant.parent.name in ["script", "style"] \ or isinstance(descendant, Comment) \ or "none" in descendant.parent.get("style","")\ or "font-size:0px" in descendant.parent.get("style",""): continue data = descendant.strip() if len(data) > 0: if not ignore_pureascii_words or any([ord(i)>127 for i in data]): if PY2: result.append(data.encode()) else: result.append(data) return result
def is_doctype(self): ''' Check if this element is a doctype ''' return isinstance(self.context, Doctype)
def _test_doctype(self, doctype_fragment): """Run a battery of assertions on a given doctype string.""" doctype_str = '<!DOCTYPE %s>' % doctype_fragment markup = doctype_str + '<p>foo</p>' soup = self.soup(markup) doctype = soup.contents[0] self.assertEqual(doctype.__class__, Doctype) self.assertEqual(doctype, doctype_fragment) self.assertEqual(str(soup)[:len(doctype_str)], doctype_str) # Make sure that the doctype was correctly associated with the # parse tree and that the rest of the document parsed. self.assertEqual(soup.p.contents[0], 'foo')