我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sgmllib.SGMLParser()。
def feed(self, data): data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data) #data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') try: bytes if bytes is str: raise NameError self.encoding = self.encoding + '_INVALID_PYTHON_3' except NameError: if self.encoding and type(data) == type(u''): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self)
def __init__(self, formatter, verbose=0): """Creates an instance of the HTMLParser class. The formatter parameter is the formatter instance associated with the parser. """ sgmllib.SGMLParser.__init__(self, verbose) self.formatter = formatter
def reset(self): sgmllib.SGMLParser.reset(self) self.savedata = None self.isindex = 0 self.title = None self.base = None self.anchor = None self.anchorlist = [] self.nofill = 0 self.list_stack = [] # ------ Methods used internally; some may be overridden # --- Formatter interface, taking care of 'savedata' mode; # shouldn't need to be overridden
def __init__(self): sgmllib.SGMLParser.__init__(self) self.entries = [] self.dates = [] self.inHtml = 0 self.inDate = 0 self.data = ""
def __init__(self, url, verbose=VERBOSE, checker=None): self.myverbose = verbose # now unused self.checker = checker self.base = None self.links = {} self.names = [] self.url = url sgmllib.SGMLParser.__init__(self)
def __init__(self, verbose=0): "Initialise an object, passing 'verbose' to the superclass." sgmllib.SGMLParser.__init__(self, verbose) self.hyperlinks = []
def __init__(self, encoding, _type): self.encoding = encoding self._type = _type sgmllib.SGMLParser.__init__(self)
def reset(self): self.pieces = [] sgmllib.SGMLParser.reset(self)
def feed(self, data): data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data) data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') try: bytes if bytes is str: raise NameError self.encoding = self.encoding + '_INVALID_PYTHON_3' except NameError: if self.encoding and isinstance(data, str): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self)
def parse_declaration(self, i): try: return sgmllib.SGMLParser.parse_declaration(self, i) except sgmllib.SGMLParseError: # escape the doctype declaration and continue parsing self.handle_data('<') return i+1
def __init__(self, baseuri, baselang, encoding, entities): sgmllib.SGMLParser.__init__(self) _FeedParserMixin.__init__(self, baseuri, baselang, encoding) _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') self.entities=entities
def feed(self, data): data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data) data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') try: bytes if bytes is str: raise NameError self.encoding = self.encoding + u'_INVALID_PYTHON_3' except NameError: if self.encoding and isinstance(data, unicode): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self)
def __init__(self): sgmllib.SGMLParser.__init__(self)
def __init__(self, encoding, _type): self.encoding = encoding self._type = _type if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) sgmllib.SGMLParser.__init__(self)
def parse_starttag(self,i): j=sgmllib.SGMLParser.parse_starttag(self, i) if self._type == 'application/xhtml+xml': if j>2 and self.rawdata[j-2:j]=='/>': self.unknown_endtag(self.lasttag) return j