Python pygments 模块，lex() 实例源码

我们从Python开源项目中，提取了以下15个代码示例，用于说明如何使用pygments.lex()。

项目：leetcode 作者：thomasyimgit | 项目源码 | 文件源码

def highlight(self, block):
        """Method called on each block to highlight it content"""
        tokens = pygments.lex(block, self.python_lexer)
        if self.format_rst:
            from pygments.token import Token
            toks = []
            for token in tokens:
                if token[0] == Token.String.Doc and len(token[1]) > 6:
                    toks += pygments.lex(token[1][:3], self.python_lexer)
                    # parse doc string content by rst lexer
                    toks += pygments.lex(token[1][3:-3], self.rst_lexer)
                    toks += pygments.lex(token[1][-3:], self.python_lexer)
                elif token[0] == Token.Comment.Single:
                    toks.append((Token.Comment.Single, token[1][0]))
                    # parse comment content by rst lexer
                    # remove the extrat newline added by rst lexer
                    toks += list(pygments.lex(token[1][1:], self.rst_lexer))[:-1]
                else:
                    toks.append(token)
            tokens = toks
        return pygments.format(tokens, self.formatter)

项目：token-rnn-tensorflow 作者：aalmendoza | 项目源码 | 文件源码

def fix_preprocessor_defs(tokens, lexer):
    res = []
    for t in tokens:
        token_split = t[1].split()
        if not is_token_subtype(t[0], Token.Literal.String) and len(token_split) > 1:
            if t[0] == Token.Comment.PreprocFile:
                if t[1].startswith('"'):
                    end = t[1].find('"', t[1].find('"') + 1) + 1
                elif t[1].startswith('<'):
                    end = t[1].find('>') + 1
                else:
                    end = t[1].find(' ') + 1
                res.append((t[0], t[1][:end]))
            else:
                token_lexed = list(lex(' '.join(token_split), lexer))
                res += token_lexed
        else:
            res += (t,)

    return res

项目：deviation-manual 作者：DeviationTX | 项目源码 | 文件源码

def __iter__(self):
        """parse code string and yield "clasified" tokens
        """
        try:
            tokens = self.lex()
        except IOError:
            log.info("Pygments lexer not found, using fallback")
            # TODO: write message to INFO
            yield ('', self.code)
            return

        for ttype, value in self.join(tokens):
            yield (_get_ttype_class(ttype), value)


# code_block_directive
# --------------------
# ::

项目：yatta_reader 作者：sound88 | 项目源码 | 文件源码

def highlight(self, block):
        """Method called on each block to highlight it content"""
        tokens = pygments.lex(block, self.python_lexer)
        if self.format_rst:
            from pygments.token import Token
            toks = []
            for token in tokens:
                if token[0] == Token.String.Doc and len(token[1]) > 6:
                    toks += pygments.lex(token[1][:3], self.python_lexer)
                    # parse doc string content by rst lexer
                    toks += pygments.lex(token[1][3:-3], self.rst_lexer)
                    toks += pygments.lex(token[1][-3:], self.python_lexer)
                elif token[0] == Token.Comment.Single:
                    toks.append((Token.Comment.Single, token[1][0]))
                    # parse comment content by rst lexer
                    # remove the extrat newline added by rst lexer
                    toks += list(pygments.lex(token[1][1:], self.rst_lexer))[:-1]
                else:
                    toks.append(token)
            tokens = toks
        return pygments.format(tokens, self.formatter)

项目：aws-cfn-plex 作者：lordmuffin | 项目源码 | 文件源码

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

项目：AshsSDK 作者：thehappydinoa | 项目源码 | 文件源码

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

项目：chalktalk_docs 作者：loremIpsum1771 | 项目源码 | 文件源码

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

项目：blackmamba 作者：zrzka | 项目源码 | 文件源码

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

项目：RST-vscode 作者：tht13 | 项目源码 | 文件源码

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

项目：token-rnn-tensorflow 作者：aalmendoza | 项目源码 | 文件源码

def tokenize_code(code, lexer, language, literal_option):
    tokens = lex(code, lexer)
    tokensList = list(tokens)

    # Strip comments and alter strings
    lexedWoComments = tokensExceptTokenType(tokensList, Token.Comment, retainedTypes=[Token.Comment.Preproc, Token.Comment.PreprocFile])
    lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Literal.String.Doc)
    lexedWoComments = fixTypes(lexedWoComments, language) #Alter the pygments lexer types to be more comparable between our languages
    lexedWoComments = convertNamespaceTokens(lexedWoComments, language)
    lexedWoComments = fix_preprocessor_defs(lexedWoComments, lexer)
    lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Comment, retainedTypes=[Token.Comment.Preproc, Token.Comment.PreprocFile])

    if(literal_option == 0):
        lexedWoComments = modifyStrings(lexedWoComments, underscoreString)
    elif(literal_option == 1):
        lexedWoComments = modifyStrings(lexedWoComments, singleStringToken)
    elif(literal_option == 2):
        lexedWoComments = modifyStrings(lexedWoComments, spaceString)
    elif(literal_option == 3):
        lexedWoComments = modifyStrings(lexedWoComments, singleStringToken)
        lexedWoComments = collapseStrings(lexedWoComments)
        lexedWoComments = modifyNumbers(lexedWoComments, singleNumberToken)

    return get_tokenization(lexedWoComments, lexer)

# source_file: path of source file to be tokenized
# language: programming language of source file, e.g. "c"
# literal_option:
#   0 -> replace all spaces in strings with _
#   1 -> replace all strings with a <str> tag
#   2 -> add spaces to the ends of the strings
#   3 -> collapse strings to <str> and collapses numbers to a type as well.

项目：deviation-manual 作者：DeviationTX | 项目源码 | 文件源码

def lex(self):
        # Get lexer for language (use text as fallback)
        try:
            if self.language and unicode(self.language).lower() <> 'none':
                lexer = get_lexer_by_name(self.language.lower(),
                                        **self.custom_args
                                        )
            else:
                lexer = get_lexer_by_name('text', **self.custom_args)
        except ValueError:
            log.info("no pygments lexer for %s, using 'text'" \
                % self.language)
            # what happens if pygment isn't present ?
            lexer = get_lexer_by_name('text')
        return pygments.lex(self.code, lexer)

项目：gnatdashboard 作者：AdaCore | 项目源码 | 文件源码

def format_testcase_diff(diff):
        """Format a testcase output diff.

        PARAMETERS
            diff: the diff content

        RETURNS
            a list of pygments' Tokens
        """

        def new_line_token():
            """Generate a new line token."""
            return Token.Whitespace, '\n'

        def indent_token():
            """Generate an indentation space token."""
            return Token.Whitespace, ' ' * 4

        tokens = []
        new_line = True

        # Because of logging prefixes, skip the first line to avoid
        # misalignment.
        tokens.append(new_line_token())

        for ttype, value in pygments.lex(diff, DiffLexer()):
            for subval in value.split('\n'):
                if new_line:
                    tokens.append(indent_token())

                new_line = not subval

                if subval:
                    tokens.append((ttype, subval))
                else:
                    tokens.append(new_line_token())

        return tokens

项目：tf_aws_ecs_instance_draining_on_scale_in 作者：terraform-community-modules | 项目源码 | 文件源码

def __iter__(self):
        """Parse self.code and yield "classified" tokens.
        """
        if self.lexer is None:
            yield ([], self.code)
            return
        tokens = pygments.lex(self.code, self.lexer)
        for tokentype, value in self.merge(tokens):
            if self.tokennames == 'long': # long CSS class args
                classes = str(tokentype).lower().split('.')
            else: # short CSS class args
                classes = [_get_ttype_class(tokentype)]
            classes = [cls for cls in classes if cls not in unstyled_tokens]
            yield (classes, value)

项目：VisualPython 作者：RobinManoli | 项目源码 | 文件源码

def tokens(self, event=None):
        """
        Highlight tokens as rendered by Pygments. Seems to only work after textarea is updated, though calling update_idletasks has no effect.
        The problem can be solved by recalling the function if there is no bbox, (as with update_linenumbers), or figure out what is not updated
        when running this function (bbox was the case in update_linenumbers).
        """
        # http://stackoverflow.com/a/30199105
        from pygments import lex, highlight
        from pygments.lexers import PythonLexer
        from pygments.formatters import HtmlFormatter

        # don't use because multiline strings can start at beginning and end in visible view
        #tv = self.mainframe.texthelper.top_visible(self.textarea)
        # use since highlight works if multiline str not properly closed
        bv = self.mainframe.texthelper.bottom_visible(self.textarea)
        data = self.textarea.get("1.0", bv) # "end-1c"

        if data == self.prevdata:
            return

        self.clear_tokens()

        #print( highlight(data, PythonLexer(), HtmlFormatter()))
        prev_content = ''

        i = 0
        for token, content in lex(data, PythonLexer()):
            lencontent = len(content)

            # this happens sometimes in lubuntu
            if not content:
                #print('no content in HighLight.tokens() loop')
                continue

            #str(token) == 'Token.Literal.String.Doc' \
            if self.mainframe.texthelper.visible(self.textarea, '1.0 + %dc' % i) \
            or self.mainframe.texthelper.visible(self.textarea, '1.0 + %dc' % (i+lencontent)):
                self.textarea.mark_set("range_start", "1.0 + %dc" %i )
                self.textarea.mark_set("range_end", "range_start + %dc" % lencontent)
                self.textarea.tag_add(str(token), "range_start", "range_end")

            i += lencontent

        self.prevdata = data

项目：token-rnn-tensorflow 作者：aalmendoza | 项目源码 | 文件源码

def get_tokenization(lexedWoComments, lexer):
    tokenized_string = ''
    token_types = []
    curr_line_empty = True
    for t in lexedWoComments:
        token_type = str(t[0])
        token = t[1]
        token_stripped = token.strip()

        # Pygments will sometimes lex many tokens as one
        # This can occur with preprocessor directives and definitions in C
        # In this case, we need to lex that whole line
        num_tokens = len(token.split())
        if num_tokens > 1:
            # Need to manually lex each space seperated token on occassions
            # when pygments doesn't lex properly
            line_split = token.split()
            line_lexed = []
            for temp_token in line_split:
                token_lexed = list(lex(temp_token, lexer))
                for lexed in token_lexed:
                    if lexed[1] != "\n":
                        line_lexed.append(lexed)
            line_lexed.append((Token.Text, '\n'))
            line_code, line_types = get_tokenization(line_lexed, lexer)
            tokenized_string += line_code
            token_types += line_types
            curr_line_empty = True
            continue

        if '\n' in token:
            if curr_line_empty:
                if (t[0] != Token.Text or t[0] != Token.Comment.Preproc) and token_stripped != '':
                    tokenized_string += token_stripped + "\n"
                    token_types.append(token_type)
            else:
                tokenized_string += token_stripped + "\n"

                # Edge case for stray "\" in code
                if token_stripped == "\\":
                    token_types.append(token_type)
            curr_line_empty = True
        elif t[0] != Token.Text and len(token_stripped) > 0:
            curr_line_empty = False
            tokenized_string += token + ' '
            token_types.append(token_type)

    assert len(tokenized_string.split()) == len(token_types), "{0} != {1}".format(len(tokenized_string.split()), len(token_types))
    return tokenized_string, token_types