Python sgmllib 模块,SGMLParser() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sgmllib.SGMLParser()

项目:download-manager    作者:thispc    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        #data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) 
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + '_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and type(data) == type(u''):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def __init__(self, formatter, verbose=0):
        """Creates an instance of the HTMLParser class.

        The formatter parameter is the formatter instance associated with
        the parser.

        """
        sgmllib.SGMLParser.__init__(self, verbose)
        self.formatter = formatter
项目:kinect-2-libras    作者:inessadl    | 项目源码 | 文件源码
def reset(self):
        sgmllib.SGMLParser.reset(self)
        self.savedata = None
        self.isindex = 0
        self.title = None
        self.base = None
        self.anchor = None
        self.anchorlist = []
        self.nofill = 0
        self.list_stack = []

    # ------ Methods used internally; some may be overridden

    # --- Formatter interface, taking care of 'savedata' mode;
    # shouldn't need to be overridden
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def __init__(self):
        sgmllib.SGMLParser.__init__(self)
        self.entries = []
        self.dates = [] 
        self.inHtml = 0
        self.inDate = 0
        self.data = ""
项目:code    作者:ActiveState    | 项目源码 | 文件源码
def __init__(self, url, verbose=VERBOSE, checker=None):
        self.myverbose = verbose # now unused
        self.checker = checker
        self.base = None
        self.links = {}
        self.names = []
        self.url = url
        sgmllib.SGMLParser.__init__(self)
项目:darkc0de-old-stuff    作者:tuwid    | 项目源码 | 文件源码
def __init__(self, verbose=0): 
        "Initialise an object, passing 'verbose' to the superclass." 

        sgmllib.SGMLParser.__init__(self, verbose) 
        self.hyperlinks = []
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def __init__(self, formatter, verbose=0):
        """Creates an instance of the HTMLParser class.

        The formatter parameter is the formatter instance associated with
        the parser.

        """
        sgmllib.SGMLParser.__init__(self, verbose)
        self.formatter = formatter
项目:hostapd-mana    作者:adde88    | 项目源码 | 文件源码
def reset(self):
        sgmllib.SGMLParser.reset(self)
        self.savedata = None
        self.isindex = 0
        self.title = None
        self.base = None
        self.anchor = None
        self.anchorlist = []
        self.nofill = 0
        self.list_stack = []

    # ------ Methods used internally; some may be overridden

    # --- Formatter interface, taking care of 'savedata' mode;
    # shouldn't need to be overridden
项目:SublimeRSS    作者:JaredMHall    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)
项目:SublimeRSS    作者:JaredMHall    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:SublimeRSS    作者:JaredMHall    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + '_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and isinstance(data, str):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:SublimeRSS    作者:JaredMHall    | 项目源码 | 文件源码
def parse_declaration(self, i):
        try:
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except sgmllib.SGMLParseError:
            # escape the doctype declaration and continue parsing
            self.handle_data('&lt;')
            return i+1
项目:SublimeRSS    作者:JaredMHall    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + u'_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and isinstance(data, unicode):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def parse_declaration(self, i):
        try:
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except sgmllib.SGMLParseError:
            # escape the doctype declaration and continue parsing
            self.handle_data('&lt;')
            return i+1
项目:touch-pay-client    作者:HackPucBemobi    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:dati-ckan-docker    作者:italia    | 项目源码 | 文件源码
def __init__(self):
        sgmllib.SGMLParser.__init__(self)
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + u'_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and isinstance(data, unicode):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def parse_declaration(self, i):
        try:
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except sgmllib.SGMLParseError:
            # escape the doctype declaration and continue parsing
            self.handle_data('&lt;')
            return i+1
项目:true_review_web2py    作者:lucadealfaro    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + u'_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and isinstance(data, unicode):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def parse_declaration(self, i):
        try:
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except sgmllib.SGMLParseError:
            # escape the doctype declaration and continue parsing
            self.handle_data('&lt;')
            return i+1
项目:spc    作者:whbrewer    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:respeaker_virtualenv    作者:respeaker    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)
项目:respeaker_virtualenv    作者:respeaker    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:respeaker_virtualenv    作者:respeaker    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + u'_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and isinstance(data, unicode):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:respeaker_virtualenv    作者:respeaker    | 项目源码 | 文件源码
def parse_declaration(self, i):
        try:
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except sgmllib.SGMLParseError:
            # escape the doctype declaration and continue parsing
            self.handle_data('&lt;')
            return i+1
项目:respeaker_virtualenv    作者:respeaker    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def feed(self, data):
        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
        data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
        data = data.replace('&#39;', "'")
        data = data.replace('&#34;', '"')
        try:
            bytes
            if bytes is str:
                raise NameError
            self.encoding = self.encoding + u'_INVALID_PYTHON_3'
        except NameError:
            if self.encoding and isinstance(data, unicode):
                data = data.encode(self.encoding)
        sgmllib.SGMLParser.feed(self, data)
        sgmllib.SGMLParser.close(self)
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def parse_declaration(self, i):
        try:
            return sgmllib.SGMLParser.parse_declaration(self, i)
        except sgmllib.SGMLParseError:
            # escape the doctype declaration and continue parsing
            self.handle_data('&lt;')
            return i+1
项目:Problematica-public    作者:TechMaz    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:oil    作者:oilshell    | 项目源码 | 文件源码
def __init__(self, formatter, verbose=0):
        """Creates an instance of the HTMLParser class.

        The formatter parameter is the formatter instance associated with
        the parser.

        """
        sgmllib.SGMLParser.__init__(self, verbose)
        self.formatter = formatter
项目:oil    作者:oilshell    | 项目源码 | 文件源码
def reset(self):
        sgmllib.SGMLParser.reset(self)
        self.savedata = None
        self.isindex = 0
        self.title = None
        self.base = None
        self.anchor = None
        self.anchorlist = []
        self.nofill = 0
        self.list_stack = []

    # ------ Methods used internally; some may be overridden

    # --- Formatter interface, taking care of 'savedata' mode;
    # shouldn't need to be overridden
项目:oil    作者:oilshell    | 项目源码 | 文件源码
def __init__(self, url, verbose=VERBOSE, checker=None):
        self.myverbose = verbose # now unused
        self.checker = checker
        self.base = None
        self.links = {}
        self.names = []
        self.url = url
        sgmllib.SGMLParser.__init__(self)
项目:python2-tracer    作者:extremecoders-re    | 项目源码 | 文件源码
def __init__(self, formatter, verbose=0):
        """Creates an instance of the HTMLParser class.

        The formatter parameter is the formatter instance associated with
        the parser.

        """
        sgmllib.SGMLParser.__init__(self, verbose)
        self.formatter = formatter
项目:python2-tracer    作者:extremecoders-re    | 项目源码 | 文件源码
def reset(self):
        sgmllib.SGMLParser.reset(self)
        self.savedata = None
        self.isindex = 0
        self.title = None
        self.base = None
        self.anchor = None
        self.anchorlist = []
        self.nofill = 0
        self.list_stack = []

    # ------ Methods used internally; some may be overridden

    # --- Formatter interface, taking care of 'savedata' mode;
    # shouldn't need to be overridden
项目:python2-tracer    作者:extremecoders-re    | 项目源码 | 文件源码
def __init__(self, url, verbose=VERBOSE, checker=None):
        self.myverbose = verbose # now unused
        self.checker = checker
        self.base = None
        self.links = {}
        self.names = []
        self.url = url
        sgmllib.SGMLParser.__init__(self)
项目:download-manager    作者:thispc    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
        sgmllib.SGMLParser.__init__(self)
项目:download-manager    作者:thispc    | 项目源码 | 文件源码
def reset(self):
        self.pieces = []
        sgmllib.SGMLParser.reset(self)
项目:download-manager    作者:thispc    | 项目源码 | 文件源码
def parse_starttag(self,i):
        j=sgmllib.SGMLParser.parse_starttag(self, i)
        if self._type == 'application/xhtml+xml':
            if j>2 and self.rawdata[j-2:j]=='/>':
                self.unknown_endtag(self.lasttag)
        return j
项目:download-manager    作者:thispc    | 项目源码 | 文件源码
def __init__(self, baseuri, baselang, encoding, entities):
        sgmllib.SGMLParser.__init__(self)
        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
        _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml')
        self.entities=entities
项目:tingbot-apps    作者:WhistleMaster    | 项目源码 | 文件源码
def __init__(self, encoding, _type):
        self.encoding = encoding
        self._type = _type
        sgmllib.SGMLParser.__init__(self)