我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用pyparsing.Regex()。
def lexical_analysis(self, src): delimited = re.sub(r'\s+', ' ', ' '.join(src.strip().split('\n'))).split(';') result = [] for stmt in delimited: if stmt == '': return result string = pp.Regex('[a-zA-Z0-9=_]+') nums = pp.Regex('[0-9]+') ws = pp.OneOrMore(pp.White()).suppress() lp = pp.Regex('[(]').suppress() rp = pp.Regex('[)]').suppress() c = pp.Regex('[,]').suppress() q = pp.Regex("[']").suppress() table_name = string.setResultsName('table_name') create_table = (pp.Keyword('CREATE', caseless = True) + ws + pp.Keyword('TABLE', caseless = True) + ws + pp.Optional(pp.Keyword('IF', caseless = True) + ws + pp.Keyword('NOT', caseless = True) + ws + pp.Keyword('EXISTS', caseless = True))).suppress() + table_name + lp column_name = string.setResultsName('column_name') data_type = string.setResultsName('data_type') length = lp + nums.setResultsName('length') + rp nullable = (pp.Optional(pp.Keyword('NOT', caseless = True) + ws) + pp.Keyword('NULL', caseless = True)).setResultsName('nullable') default_value = pp.Keyword('DEFAULT', caseless = True).suppress() + ws + string.setResultsName('default_value') auto_increment = pp.Keyword('AUTO_INCREMENT', caseless = True).setResultsName('auto_increment') column = pp.Optional(ws) + column_name + ws + data_type + pp.Optional(pp.MatchFirst([length, ws + nullable, ws + default_value, ws + auto_increment])) + pp.Optional(pp.MatchFirst([ws + nullable, ws + default_value, ws + auto_increment])) + pp.Optional(pp.MatchFirst([ws + default_value, ws + auto_increment])) + pp.Optional(ws + auto_increment) + pp.Optional(ws) + c primary_key = pp.Keyword('PRIMARY KEY', caseless = True).suppress() + lp + pp.OneOrMore(q + string.setResultsName('primary_key') + q + pp.Optional(c)) + rp + pp.Optional(c) key = pp.Keyword('KEY', caseless = True).suppress() + lp + q + string.setResultsName('key') + q + pp.Optional(c) + rp + pp.Optional(c) parser = create_table + pp.OneOrMore(pp.Group(column)) + pp.Optional(primary_key) + pp.Optional(key) + rp + pp.OneOrMore(ws + string).suppress() result.append(parser.parseString(stmt, parseAll=True)) return result
def _build_parser(): date_literal = pp.Regex(r'(?P<year>\d{4})/(?P<month>\d{2})/(?P<day>\d{2})') \ .setParseAction(lambda s,l,t: schema.Date(t.year, t.month, t.day)) dollars_literal = pp.Regex(r'\$\d+(\.\d{2})') \ .setParseAction(lambda s,l,t: schema.Dollars(t[0])) string_literal = (pp.QuotedString('"', escChar='\\') | pp.QuotedString("'", escChar='\\')) \ .setParseAction(lambda s,l,t: schema.String(t[0])) literal = date_literal | dollars_literal | string_literal ident = pp.Word(pp.alphas) match_op = pp.oneOf(operator_map.keys()) match = ident + match_op + literal assign_op = pp.Literal('=') assign = ident + assign_op + literal part = (match | assign).setParseAction(lambda s,l,t: [t]) rule = pp.delimitedList(part) + pp.LineEnd() return rule
def _parse_atat_lattice(lattice_in): """Parse an ATAT-style `lat.in` string. The parsed string will be in three groups: (Coordinate system) (lattice) (atoms) where the atom group is split up into subgroups, each describing the position and atom name """ float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?').setParseAction(lambda t: [float(t[0])]) vector = Group(float_number + float_number + float_number) angles = vector vector_line = vector + Suppress(LineEnd()) coord_sys = Group((vector_line + vector_line + vector_line) | (vector + angles + Suppress(LineEnd()))) lattice = Group(vector + vector + vector) atom = Group(vector + Group(OneOrMore(Word(alphas + '_')))) atat_lattice_grammer = coord_sys + lattice + Group(OneOrMore(atom)) # parse the input string and convert it to a POSCAR string return atat_lattice_grammer.parseString(lattice_in)
def _create_simple_statements(): global binary, ident, rvalue, simple_statement, semi, comp, number, slot_id, callrpc_stmt, generic_statement, streamer_stmt, stream, selector if simple_statement is not None: return meta_stmt = Group(Literal('meta').suppress() + ident + Literal('=').suppress() + rvalue + semi).setResultsName('meta_statement') require_stmt = Group(Literal('require').suppress() + ident + comp + rvalue + semi).setResultsName('require_statement') set_stmt = Group(Literal('set').suppress() - (ident | number) - Literal("to").suppress() - (rvalue | binary) - Optional(Literal('as').suppress() + config_type) + semi).setResultsName('set_statement') callrpc_stmt = Group(Literal("call").suppress() + (ident | number) + Literal("on").suppress() + slot_id + Optional(Literal("=>").suppress() + stream('explicit_stream')) + semi).setResultsName('call_statement') streamer_stmt = Group(Optional(Literal("manual")('manual')) + Optional(oneOf(u'encrypted signed')('security')) + Optional(Literal(u'realtime')('realtime')) + Literal('streamer').suppress() - Literal('on').suppress() - selector('selector') - Optional(Literal('to').suppress() - slot_id('explicit_tile')) - Optional(Literal('with').suppress() - Literal('streamer').suppress() - number('with_other')) - semi).setResultsName('streamer_statement') copy_stmt = Group(Literal("copy").suppress() - Optional(oneOf("all count average")('modifier')) - Optional(stream('explicit_input') | number('constant_input')) - Literal("=>") - stream("output") - semi).setResultsName('copy_statement') trigger_stmt = Group(Literal("trigger") - Literal("streamer") - number('index') - semi).setResultsName('trigger_statement') simple_statement = meta_stmt | require_stmt | set_stmt | callrpc_stmt | streamer_stmt | trigger_stmt | copy_stmt # In generic statements, keep track of the location where the match started for error handling locator = Empty().setParseAction(lambda s, l, t: l)('location') generic_statement = Group(locator + Group(ZeroOrMore(Regex(u"[^{};]+")) + Literal(u';'))('match')).setResultsName('unparsed_statement')
def lexical_analysis(self, src): string = pp.Regex('[a-zA-Z0-9_{}"=+\-*/\.:;&%@$#<>? ?-??-??-???-???????-?]+') blank = pp.LineStart() + pp.LineEnd() start = '[' end = ']' + pp.LineEnd() graph_tag = pp.LineStart() + '@' graph = graph_tag + start + string + end view_tag = pp.LineStart() + '#' view = view_tag + start + string + end server_process_tag = pp.LineStart() + '$' server_process = server_process_tag + start + string + end client_process_tag = pp.LineStart() + '%' client_process = client_process_tag + start + string + end view_transition_identifier = pp.LineStart() + '-->' view_transition = view_transition_identifier + string process_transition_identifier = pp.LineStart() + '==>' process_transition = process_transition_identifier + string state_machine = pp.OneOrMore(graph | view | server_process | client_process | view_transition | process_transition | string | blank) return state_machine.parseString(src)
def make_grammar(): """Creates the grammar to be used by a spec matcher.""" # This is apparently how pyparsing recommends to be used, # as http://pyparsing.wikispaces.com/share/view/644825 states that # it is not thread-safe to use a parser across threads. unary_ops = ( # Order matters here (so that '=' doesn't match before '==') Literal("==") | Literal("=") | Literal("!=") | Literal("<in>") | Literal(">=") | Literal("<=") | Literal(">") | Literal("<") | Literal("s==") | Literal("s!=") | # Order matters here (so that '<' doesn't match before '<=') Literal("s<=") | Literal("s<") | # Order matters here (so that '>' doesn't match before '>=') Literal("s>=") | Literal("s>")) or_ = Literal("<or>") # An atom is anything not an keyword followed by anything but whitespace atom = ~(unary_ops | or_) + Regex(r"\S+") unary = unary_ops + atom disjunction = OneOrMore(or_ + atom) # Even-numbered tokens will be '<or>', so we drop them disjunction.setParseAction(lambda _s, _l, t: ["<or>"] + t[1::2]) expr = disjunction | unary | atom return expr
def getnewnick(sentence): """Parse detail to get new nick.""" nick = pp.Regex('[a-zA-Z0-9\-_|^]+').setResultsName('nick') parser = pp.Literal('is now known as') + nick + pp.restOfLine try: result = parser.parseString(sentence) return result.nick except pp.ParseException as x: pass
def __init__(self): """ Create a parser that parse arithmetic expressions. They can contains variable identifiers or raw numbers. The meaning for the identifiers is left to the """ number = p.Regex(r'\d+(\.\d*)?([eE]\d+)?') identifier = p.Word(p.alphas) terminal = identifier | number self._expr = p.infixNotation(terminal, [ (p.oneOf('* /'), 2, p.opAssoc.LEFT), (p.oneOf('+ -'), 2, p.opAssoc.LEFT) ]) + p.stringEnd()
def _cast_transformer(self): """Removes obvious casts.""" return pyparsing.Combine( pyparsing.Regex(r"\([^()]*\)").suppress() + (pyparsing.Word(pyparsing.alphanums + "_") | pyparsing.Literal("(")), adjacent=False)
def _create_primitives(): global binary, ident, rvalue, number, quoted_string, semi, time_interval, slot_id, comp, config_type, stream, comment, stream_trigger, selector if ident is not None: return semi = Literal(u';').suppress() ident = Word(alphas+u"_", alphas + nums + u"_") number = Regex(u'((0x[a-fA-F0-9]+)|[+-]?[0-9]+)').setParseAction(lambda s, l, t: [int(t[0], 0)]) binary = Regex(u'hex:([a-fA-F0-9][a-fA-F0-9])+').setParseAction(lambda s, l, t: [unhexlify(t[0][4:])]) quoted_string = dblQuotedString comment = Literal('#') + restOfLine rvalue = number | quoted_string # Convert all time intervals into an integer number of seconds time_unit_multipliers = { u'second': 1, u'seconds': 1, u'minute': 60, u'minutes': 60, u'hour': 60*60, u'hours': 60*60, u'day': 60*60*24, u'days': 60*60*24, u'month': 60*60*24*30, u'months': 60*60*24*30, u'year': 60*60*24*365, u'years': 60*60*24*365, } config_type = oneOf('uint8_t uint16_t uint32_t int8_t int16_t int32_t uint8_t[] uint16_t[] uint32_t[] int8_t[] int16_t[] int32_t[] string binary') comp = oneOf('> < >= <= == ~=') time_unit = oneOf(u"second seconds minute minutes hour hours day days week weeks month months year years") time_interval = (number + time_unit).setParseAction(lambda s, l, t: [t[0]*time_unit_multipliers[t[1]]]) slot_id = Literal(u"controller") | (Literal(u'slot') + number) slot_id.setParseAction(lambda s,l,t: [SlotIdentifier.FromString(u' '.join([str(x) for x in t]))]) stream_modifier = Literal("system") | Literal("user") | Literal("combined") stream = Optional(Literal("system")) + oneOf("buffered unbuffered input output counter constant") + number + Optional(Literal("node")) stream.setParseAction(lambda s,l,t: [DataStream.FromString(u' '.join([str(x) for x in t]))]) all_selector = Optional(Literal("all")) + Optional(stream_modifier) + oneOf("buffered unbuffered inputs outputs counters constants") + Optional(Literal("nodes")) all_selector.setParseAction(lambda s,l,t: [DataStreamSelector.FromString(u' '.join([str(x) for x in t]))]) one_selector = Optional(Literal("system")) + oneOf("buffered unbuffered input output counter constant") + number + Optional(Literal("node")) one_selector.setParseAction(lambda s,l,t: [DataStreamSelector.FromString(u' '.join([str(x) for x in t]))]) selector = one_selector | all_selector trigger_comp = oneOf('> < >= <= ==') stream_trigger = Group((Literal(u'count') | Literal(u'value')) + Literal(u'(').suppress() - stream - Literal(u')').suppress() - trigger_comp - number).setResultsName('stream_trigger')