我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用token.INDENT。
def find_strings(filename): """Return a dict of possible docstring positions. The dict maps line numbers to strings. There is an entry for line that contains only a string or a part of a triple-quoted string. """ d = {} # If the first token is a string, then it's the module docstring. # Add this special case so that the test in the loop passes. prev_ttype = token.INDENT f = open(filename) for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline): if ttype == token.STRING: if prev_ttype == token.INDENT: sline, scol = start eline, ecol = end for i in range(sline, eline + 1): d[i] = 1 prev_ttype = ttype f.close() return d
def _find_strings(filename, encoding=None): """Return a dict of possible docstring positions. The dict maps line numbers to strings. There is an entry for line that contains only a string or a part of a triple-quoted string. """ d = {} # If the first token is a string, then it's the module docstring. # Add this special case so that the test in the loop passes. prev_ttype = token.INDENT with open(filename, encoding=encoding) as f: tok = tokenize.generate_tokens(f.readline) for ttype, tstr, start, end, line in tok: if ttype == token.STRING: if prev_ttype == token.INDENT: sline, scol = start eline, ecol = end for i in range(sline, eline + 1): d[i] = 1 prev_ttype = ttype return d
def __openseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == ')': # We've seen the last of the translatable strings. Record the # line number of the first line of the strings and update the list # of messages seen. Reset state for the next batch. If there # were no strings inside _(), then just ignore this entry. if self.__data: self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]: # warn if we see anything else than STRING or whitespace print >> sys.stderr, _( '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' ) % { 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno } self.__state = self.__waiting
def _find_logical(source_lines): # Make a variable which is the index of all the starts of lines. logical_start = [] logical_end = [] last_newline = True parens = 0 for t in generate_tokens(''.join(source_lines)): if t[0] in [tokenize.COMMENT, tokenize.DEDENT, tokenize.INDENT, tokenize.NL, tokenize.ENDMARKER]: continue if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: last_newline = True logical_end.append((t[3][0] - 1, t[2][1])) continue if last_newline and not parens: logical_start.append((t[2][0] - 1, t[2][1])) last_newline = False if t[0] == tokenize.OP: if t[1] in '([{': parens += 1 elif t[1] in '}])': parens -= 1 return (logical_start, logical_end)
def __call__(self, toktype, toktext, start_pos, end_pos, line): """ Token handler, with syntax highlighting.""" (srow,scol) = start_pos (erow,ecol) = end_pos colors = self.colors owrite = self.out.write # line separator, so this works across platforms linesep = os.linesep # calculate new positions oldpos = self.pos newpos = self.lines[srow] + scol self.pos = newpos + len(toktext) # send the original whitespace, if needed if newpos > oldpos: owrite(self.raw[oldpos:newpos]) # skip indenting tokens if toktype in [token.INDENT, token.DEDENT]: self.pos = newpos return # map token type to a color group if token.LPAR <= toktype <= token.OP: toktype = token.OP elif toktype == token.NAME and keyword.iskeyword(toktext): toktype = _KEYWORD color = colors.get(toktype, colors[_TEXT]) #print '<%s>' % toktext, # dbg # Triple quoted strings must be handled carefully so that backtracking # in pagers works correctly. We need color terminators on _each_ line. if linesep in toktext: toktext = toktext.replace(linesep, '%s%s%s' % (colors.normal,linesep,color)) # send text owrite('%s%s%s' % (color,toktext,colors.normal))
def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise if ttype == tokenize.STRING: self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT): # there was no class docstring self.__state = self.__waiting
def _get_indentword(source): """Return indentation type.""" indent_word = ' ' # Default in case source has no indentation try: for t in generate_tokens(source): if t[0] == token.INDENT: indent_word = t[1] break except (SyntaxError, tokenize.TokenError): pass return indent_word
def _parse_tokens(tokens): """Parse the tokens. This converts the tokens into a form where we can manipulate them more easily. """ index = 0 parsed_tokens = [] num_tokens = len(tokens) while index < num_tokens: tok = Token(*tokens[index]) assert tok.token_type != token.INDENT if tok.token_type == tokenize.NEWLINE: # There's only one newline and it's at the end. break if tok.token_string in '([{': (container, index) = _parse_container(tokens, index) if not container: return None parsed_tokens.append(container) else: parsed_tokens.append(Atom(tok)) index += 1 return parsed_tokens
def multiline_string_lines(source, include_docstrings=False): """Return line numbers that are within multiline strings. The line numbers are indexed at 1. Docstrings are ignored. """ line_numbers = set() previous_token_type = '' try: for t in generate_tokens(source): token_type = t[0] start_row = t[2][0] end_row = t[3][0] if token_type == tokenize.STRING and start_row != end_row: if ( include_docstrings or previous_token_type != tokenize.INDENT ): # We increment by one since we want the contents of the # string. line_numbers |= set(range(1 + start_row, 1 + end_row)) previous_token_type = token_type except (SyntaxError, tokenize.TokenError): pass return line_numbers
def check_amount_of_commenting( readable ): """ Check lines of file to see whether it has a lot of commenting in it, which we take to mean the presence of code explanation. """ # TODO: I have hacked this code together and it could be silly and buggy. # It does seem to get things roughly correct, which is good enough for the moment prev_toktype = token.INDENT first_line = None last_lineno = -1 last_col = 0 comment_lines = 0 tokgen = tokenize.generate_tokens( readable ) for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen: if 1: # Change to if 1 to see the tokens fly by. print("%10s %-14s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), "%d.%d-%d.%d" % (slineno, scol, elineno, ecol), ttext, ltext )) if slineno > last_lineno: last_col = 0 if toktype == token.STRING and prev_toktype == token.INDENT: # Docstring comment_lines += 1 elif toktype == tokenize.COMMENT: # Comment comment_lines += 1 prev_toktype = toktype last_col = ecol last_lineno = elineno return comment_lines, last_lineno # roughly num of comment lines divided by num of lines
def source_token_lines(source): """Generate a series of lines, one for each line in `source`. Each line is a list of pairs, each pair is a token:: [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] Each pair has a token class, and the token text. If you concatenate all the token texts, and then join them with newlines, you should have your original `source` back, with two differences: trailing whitespace is not preserved, and a final line with no newline is indistinguishable from a final line with a newline. """ ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]) line = [] col = 0 source = source.expandtabs(8).replace('\r\n', '\n') tokgen = generate_tokens(source) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): if part == '\n': yield line line = [] col = 0 mark_end = False elif part == '': mark_end = False elif ttype in ws_tokens: mark_end = False else: if mark_start and scol > col: line.append(("ws", " " * (scol - col))) mark_start = False tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] if ttype == token.NAME and keyword.iskeyword(ttext): tok_class = "key" line.append((tok_class, part)) mark_end = True scol = 0 if mark_end: col = ecol if line: yield line
def _reindent_stats(tokens): """Return list of (lineno, indentlevel) pairs. One for each stmt and comment line. indentlevel is -1 for comment lines, as a signal that tokenize doesn't know what to do about them; indeed, they're our headache! """ find_stmt = 1 # Next token begins a fresh stmt? level = 0 # Current indent level. stats = [] for t in tokens: token_type = t[0] sline = t[2][0] line = t[4] if token_type == tokenize.NEWLINE: # A program statement, or ENDMARKER, will eventually follow, # after some (possibly empty) run of tokens of the form # (NL | COMMENT)* (INDENT | DEDENT+)? find_stmt = 1 elif token_type == tokenize.INDENT: find_stmt = 1 level += 1 elif token_type == tokenize.DEDENT: find_stmt = 1 level -= 1 elif token_type == tokenize.COMMENT: if find_stmt: stats.append((sline, -1)) # But we're still looking for a new stmt, so leave # find_stmt alone. elif token_type == tokenize.NL: pass elif find_stmt: # This is the first "real token" following a NEWLINE, so it # must be the first token of the next program statement, or an # ENDMARKER. find_stmt = 0 if line: # Not endmarker. stats.append((sline, level)) return stats
def source_token_lines(source): """Generate a series of lines, one for each line in `source`. Each line is a list of pairs, each pair is a token:: [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] Each pair has a token class, and the token text. If you concatenate all the token texts, and then join them with newlines, you should have your original `source` back, with two differences: trailing whitespace is not preserved, and a final line with no newline is indistinguishable from a final line with a newline. """ ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL] line = [] col = 0 source = source.expandtabs(8).replace('\r\n', '\n') tokgen = tokenize.generate_tokens(StringIO(source).readline) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): if part == '\n': yield line line = [] col = 0 mark_end = False elif part == '': mark_end = False elif ttype in ws_tokens: mark_end = False else: if mark_start and scol > col: line.append(("ws", " " * (scol - col))) mark_start = False tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] if ttype == token.NAME and keyword.iskeyword(ttext): tok_class = "key" line.append((tok_class, part)) mark_end = True scol = 0 if mark_end: col = ecol if line: yield line
def _simulate_compile_singlemode(self, s): # Calculate line offsets lines = [0, 0] pos = 0 while 1: pos = s.find('\n', pos)+1 if not pos: break lines.append(pos) lines.append(len(s)) oldpos = 0 parenlevel = 0 deflevel = 0 output = [] stmt = [] text = StringIO(s) tok_gen = tokenize.generate_tokens(text.readline) for toktype, tok, (srow,scol), (erow,ecol), line in tok_gen: newpos = lines[srow] + scol stmt.append(s[oldpos:newpos]) if tok != '': stmt.append(tok) oldpos = newpos + len(tok) # Update the paren level. if tok in '([{': parenlevel += 1 if tok in '}])': parenlevel -= 1 if tok in ('def', 'class') and deflevel == 0: deflevel = 1 if deflevel and toktype == token.INDENT: deflevel += 1 if deflevel and toktype == token.DEDENT: deflevel -= 1 # Are we starting a statement? if ((toktype in (token.NEWLINE, tokenize.NL, tokenize.COMMENT, token.INDENT, token.ENDMARKER) or tok==':') and parenlevel == 0): if deflevel == 0 and self._is_expr(stmt[1:-2]): output += stmt[0] output.append('__print__((') output += stmt[1:-2] output.append('))') output += stmt[-2:] else: output += stmt stmt = [] return ''.join(output)
def source_token_lines(source): """Generate a series of lines, one for each line in `source`. Each line is a list of pairs, each pair is a token:: [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] Each pair has a token class, and the token text. If you concatenate all the token texts, and then join them with newlines, you should have your original `source` back, with two differences: trailing whitespace is not preserved, and a final line with no newline is indistinguishable from a final line with a newline. """ ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]) line = [] col = 0 # The \f is because of http://bugs.python.org/issue19035 source = source.expandtabs(8).replace('\r\n', '\n').replace('\f', ' ') tokgen = generate_tokens(source) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): if part == '\n': yield line line = [] col = 0 mark_end = False elif part == '': mark_end = False elif ttype in ws_tokens: mark_end = False else: if mark_start and scol > col: line.append(("ws", u" " * (scol - col))) mark_start = False tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] if ttype == token.NAME and keyword.iskeyword(ttext): tok_class = "key" line.append((tok_class, part)) mark_end = True scol = 0 if mark_end: col = ecol if line: yield line