我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用re.Scanner()。
def test_scanner(self): def s_ident(scanner, token): return token def s_operator(scanner, token): return "op%s" % token def s_float(scanner, token): return float(token) def s_int(scanner, token): return int(token) scanner = Scanner([ (r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*", s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+", None), ]) self.assertNotEqual(scanner.scanner.scanner("").pattern, None) self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
def scanner(cls): if not getattr(cls, '_scanner', None): def h(tpe): return lambda sc, tk: cls.Token(tpe, tk) cls._scanner = re.Scanner([ (r"(--|//).*?$", h(cls.LINE_COMMENT)), (r"\/\*.+?\*\/", h(cls.BLOCK_COMMENT)), (r'"(?:[^"\\]|\\.)*"', h(cls.STRING)), (r"'(?:[^'\\]|\\.)*'", h(cls.STRING)), (r"\$\$(?:[^\$\\]|\\.)*\$\$", h(cls.STRING)), (r";", h(cls.SEMICOLON)), (r"\s+", h(cls.WHITESPACE)), (r".", h(cls.OTHER)) ], re.MULTILINE | re.DOTALL) return cls._scanner
def test_scanner(self): def s_ident(scanner, token): return token def s_operator(scanner, token): return "op%s" % token def s_float(scanner, token): return float(token) def s_int(scanner, token): return int(token) scanner = Scanner([ (r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*", s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+", None), ]) self.assertTrue(scanner.scanner.scanner("").pattern) self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
def _scan_int(self, string, const): # TODO: Add better invalid integer handling # Check for integer sign, possibly treat unsigned integer # as POSITIVE patterns = [] INT_SIGN = (r"^[{}{}]".format(CHAR_MAP['space'], CHAR_MAP['tab']), lambda scanner, token: ("INT_SIGN", token)) INT_VAL = (r".[{}{}]*".format(CHAR_MAP['space'], CHAR_MAP['tab']), lambda scanner, token: ("INT_VAL", token)) if const == 'SIGNED_INT': patterns.append(INT_SIGN) patterns.append(INT_VAL) scanner = Scanner(patterns) found, remainder = scanner.scan(string) self.type = 'INT' try: self.value = ''.join([f[1] for f in found]) except IndexError: print("Hit IndexError, string trying to check is: {}". format(dbg(string)))
def parse(input_string): """Parses given string according to NEON syntax. :param input_string: String to parse. :type input_string: string :return: Parsed string. :rtype: :class:`OrderedDict` """ tokens = tokenize(input_string) return Indent().parse(tokens) #: The Scanner is instantiated with a list of re's and associated #: functions. It is used to scan a string, returning a list of parts #: which match the given re's. #: #: See: http://stackoverflow.com/a/17214398/2874089
def _scan_command(self, line, pos, const): patterns = [(r"^{}".format(i[0]), i[1]) for i in const] scanner = Scanner(patterns) found, remainder = scanner.scan(line[pos:]) self.type = found[0] self.value = [i[0] for i in const if i[1] == self.type][0]
def __init__(self, lexicon): self.line_no = -1 self.scanner = re.Scanner(lexicon)
def safe_eval(eval_str, **kw): ''' ??eval???eval?????????????? **kw????????????????? ''' # callback functions def start_structure(scanner, token): return "start structure", token def key(scanner, token): return "key", token def value(scanner, token): # ???? if token.lower() == 'true' and token != 'True': raise 'value Error "%s"' % token def str_value(scanner, token): return "string value", token def end_structure(scanner, token): return "end start structure", token scanner = re.Scanner([ (r"[{\[(]", start_structure), (r"[\w]+\s*:", key), (r"['\"][^'\"]+['\"]", str_value), (r"[\w]+", value), (r"\s*,\s*", None), (r"[})\]]", end_structure), ]) tokens, remainder = scanner.scan(eval_str) # make a list of safe functions safe_list = ['math', 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 'de grees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot', 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh', 'sqrt', 'tan', 'tanh'] # use the list to filter the local namespace s safe_dict = dict([(k, locals().get(k, None)) for k in safe_list]) # add any needed builtins back in. # ???????????? __builtins__?????????????True?False?????? # ?????? safe_dict['True'] = True safe_dict['False'] = False return eval(eval_str, {'__builtins__': None}, safe_dict)