我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用pip._vendor.pyparsing.Word()。
def get(self, key, defaultValue=None): """ Returns named result matching the given key, or if there is no such name, then returns the given C{defaultValue} or C{None} if no C{defaultValue} is specified. Similar to C{dict.get()}. Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") result = date_str.parseString("1999/12/31") print(result.get("year")) # -> '1999' print(result.get("hour", "not specified")) # -> 'not specified' print(result.get("hour")) # -> None """ if key in self: return self[key] else: return defaultValue
def insert( self, index, insStr ): """ Inserts new element at location index in the list of parsed tokens. Similar to C{list.insert()}. Example:: print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] # use a parse action to insert the parse location in the front of the parsed results def insert_locn(locn, tokens): tokens.insert(0, locn) print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] """ self.__toklist.insert(index, insStr) # fixup indices in token dictionary for name,occurrences in self.__tokdict.items(): for k, (value, position) in enumerate(occurrences): occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def extend( self, itemseq ): """ Add sequence of elements to end of ParseResults list of elements. Example:: patt = OneOrMore(Word(alphas)) # use a parse action to append the reverse of the matched strings, to make a palindrome def make_palindrome(tokens): tokens.extend(reversed([t[::-1] for t in tokens])) return ''.join(tokens) print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' """ if isinstance(itemseq, ParseResults): self += itemseq else: self.__toklist.extend(itemseq)
def pprint(self, *args, **kwargs): """ Pretty-printer for parsed results as a list, using the C{pprint} module. Accepts additional positional or keyword args as defined for the C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) Example:: ident = Word(alphas, alphanums) num = Word(nums) func = Forward() term = ident | num | Group('(' + func + ')') func <<= ident + Group(Optional(delimitedList(term))) result = func.parseString("fna a,b,(fnb c,d,200),100") result.pprint(width=40) prints:: ['fna', ['a', 'b', ['(', 'fnb', ['c', 'd', '200'], ')'], '100']] """ pprint.pprint(self.asList(), *args, **kwargs) # add support for pickle protocol
def copy( self ): """ Make a copy of this C{ParserElement}. Useful for defining different parse actions for the same parsing pattern, using copies of the original parse element. Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) prints:: [5120, 100, 655360, 268435456] Equivalent form of C{expr.copy()} is just C{expr()}:: integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") """ cpy = copy.copy( self ) cpy.parseAction = self.parseAction[:] cpy.ignoreExprs = self.ignoreExprs[:] if self.copyDefaultWhiteChars: cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS return cpy
def __add__(self, other ): """ Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement converts them to L{Literal}s by default. Example:: greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" print (hello, "->", greet.parseString(hello)) Prints:: Hello, World! -> ['Hello', ',', 'World', '!'] """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) return None return And( [ self, other ] )
def __call__(self, name=None): """ Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be passed as C{True}. If C{name} is omitted, same as calling C{L{copy}}. Example:: # these are equivalent userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") """ if name is not None: return self.setResultsName(name) else: return self.copy()
def ignore( self, other ): """ Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other ignorable patterns. Example:: patt = OneOrMore(Word(alphas)) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] patt.ignore(cStyleComment) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] """ if isinstance(other, basestring): other = Suppress(other) if isinstance( other, Suppress ): if other not in self.ignoreExprs: self.ignoreExprs.append(other) else: self.ignoreExprs.append( Suppress( other.copy() ) ) return self
def __str__( self ): try: return super(Word,self).__str__() except Exception: pass if self.strRepr is None: def charsAsStr(s): if len(s)>4: return s[:4]+"..." else: return s if ( self.initCharsOrig != self.bodyCharsOrig ): self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) else: self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) return self.strRepr
def delimitedList( expr, delim=",", combine=False ): """ Helper to define a delimited list of expressions - the delimiter defaults to ','. By default, the list elements and delimiters can have intervening whitespace, and comments, but this can be overridden by passing C{combine=True} in the constructor. If C{combine} is set to C{True}, the matching tokens are returned as a single token string, with the delimiters included; otherwise, the matching tokens are returned as a list of tokens, with the delimiters suppressed. Example:: delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] """ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." if combine: return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) else: return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def locatedExpr(expr): """ Helper to decorate a returned token with its starting and ending locations in the input string. This helper adds the following results names: - locn_start = location where matched expression begins - locn_end = location where matched expression ends - value = the actual parsed results Be careful if the input text contains C{<TAB>} characters, you may want to call C{L{ParserElement.parseWithTabs}} Example:: wd = Word(alphas) for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): print(match) prints:: [[0, 'ljsdf', 5]] [[8, 'lksdjjf', 15]] [[18, 'lkkjj', 23]] """ locator = Empty().setParseAction(lambda s,l,t: l) return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) # convenience constants for positional expressions
def srange(s): r""" Helper to easily define string ranges for use in Word construction. Borrows syntax from regexp '[]' string range definitions:: srange("[0-9]") -> "0123456789" srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" The input string must be enclosed in []'s, and the returned string is the expanded character set joined into a single string. The values enclosed in the []'s may be: - a single character - an escaped character with a leading backslash (such as C{\-} or C{\]}) - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) (C{\0x##} is also supported for backwards compatibility) - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) """ _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) try: return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) except Exception: return ""
def append( self, item ): """ Add single element to end of ParseResults list of elements. Example:: print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] # use a parse action to compute the sum of the parsed integers, and add it to the end def append_sum(tokens): tokens.append(sum(map(int, tokens))) print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] """ self.__toklist.append(item)
def asDict( self ): """ Returns the named parse results as a nested dictionary. Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") result = date_str.parseString('12/31/1999') print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) result_dict = result.asDict() print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} # even though a ParseResults supports dict-like access, sometime you just need to have a dict import json print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} """ if PY_3: item_fn = self.items else: item_fn = self.iteritems def toItem(obj): if isinstance(obj, ParseResults): if obj.haskeys(): return obj.asDict() else: return [toItem(v) for v in obj] else: return obj return dict((k,toItem(v)) for k,v in item_fn())
def getName(self): r""" Returns the results name for this token expression. Useful when several different expressions might match at a particular location. Example:: integer = Word(nums) ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") house_number_expr = Suppress('#') + Word(nums, alphanums) user_data = (Group(house_number_expr)("house_number") | Group(ssn_expr)("ssn") | Group(integer)("age")) user_info = OneOrMore(user_data) result = user_info.parseString("22 111-22-3333 #221B") for item in result: print(item.getName(), ':', item[0]) prints:: age : 22 ssn : 111-22-3333 house_number : 221B """ if self.__name: return self.__name elif self.__parent: par = self.__parent() if par: return par.__lookup(self) else: return None elif (len(self) == 1 and len(self.__tokdict) == 1 and next(iter(self.__tokdict.values()))[0][1] in (0,-1)): return next(iter(self.__tokdict.keys())) else: return None
def setDefaultWhitespaceChars( chars ): r""" Overrides the default whitespace chars Example:: # default whitespace chars are space, <TAB> and newline OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] # change to just treat newline as significant ParserElement.setDefaultWhitespaceChars(" \t") OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] """ ParserElement.DEFAULT_WHITE_CHARS = chars
def setName( self, name ): """ Define name for this expression, makes debugging and exception messages clearer. Example:: Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) """ self.name = name self.errmsg = "Expected " + self.name if hasattr(self,"exception"): self.exception.msg = self.errmsg return self
def setParseAction( self, *fns, **kwargs ): """ Define one or more actions to perform when successfully matching parse element definition. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - s = the original string being parsed (see note below) - loc = the location of the matching substring - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object If the functions in fns modify the tokens, they can return them as the return value from fn, and the modified list of tokens will replace the original. Otherwise, fn does not need to return any value. Optional keyword arguments: - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{parseString}<parseString>} for more information on parsing strings containing C{<TAB>}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. Example:: integer = Word(nums) date_str = integer + '/' + integer + '/' + integer date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] # use parse action to convert to ints at parse time integer = Word(nums).setParseAction(lambda toks: int(toks[0])) date_str = integer + '/' + integer + '/' + integer # note that integer fields are now ints, not strings date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] """ self.parseAction = list(map(_trim_arity, list(fns))) self.callDuringTry = kwargs.get("callDuringTry", False) return self
def addCondition(self, *fns, **kwargs): """Add a boolean predicate function to expression's list of parse actions. See L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, functions passed to C{addCondition} need to return boolean success/fail of the condition. Optional keyword arguments: - message = define a custom message to be used in the raised exception - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) year_int = integer.copy() year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") date_str = year_int + '/' + integer + '/' + integer result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) """ msg = kwargs.get("message", "failed user-defined condition") exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException for fn in fns: def pa(s,l,t): if not bool(_trim_arity(fn)(s,l,t)): raise exc_type(s,l,msg) self.parseAction.append(pa) self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) return self
def searchString( self, instring, maxMatches=_MAX_INT ): """ Another extension to C{L{scanString}}, simplifying the access to the tokens found to match the given parse expression. May be called with optional C{maxMatches} argument, to clip searching after 'n' matches are found. Example:: # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters cap_word = Word(alphas.upper(), alphas.lower()) print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) # the sum() builtin can be used to merge results into a single ParseResults object print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) prints:: [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] """ try: return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc
def setDebug( self, flag=True ): """ Enable display of debugging messages while doing pattern matching. Set C{flag} to True to enable, False to disable. Example:: wd = Word(alphas).setName("alphaword") integer = Word(nums).setName("numword") term = wd | integer # turn on debugging for wd wd.setDebug() OneOrMore(term).parseString("abc 123 xyz 890") prints:: Match alphaword at loc 0(1,1) Matched alphaword -> ['abc'] Match alphaword at loc 3(1,4) Exception raised:Expected alphaword (at char 4), (line:1, col:5) Match alphaword at loc 7(1,8) Matched alphaword -> ['xyz'] Match alphaword at loc 11(1,12) Exception raised:Expected alphaword (at char 12), (line:1, col:13) Match alphaword at loc 15(1,16) Exception raised:Expected alphaword (at char 15), (line:1, col:16) The output shown is that produced by the default debug actions - custom debug actions can be specified using L{setDebugActions}. Prior to attempting to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, which makes debugging and exception messages easier to understand - for instance, the default name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. """ if flag: self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) else: self.debug = False return self
def countedArray( expr, intExpr=None ): """ Helper to define a counted list of expressions. This helper defines a pattern of the form:: integer expr expr expr... where the leading integer tells how many expr expressions follow. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. Example:: countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] # in this parser, the leading integer value is given in binary, # '10' indicating that 2 values are in the array binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] """ arrayExpr = Forward() def countFieldParseAction(s,l,t): n = t[0] arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) return [] if intExpr is None: intExpr = Word(nums).setParseAction(lambda t:int(t[0])) else: intExpr = intExpr.copy() intExpr.setName("arrayLen") intExpr.addParseAction(countFieldParseAction, callDuringTry=True) return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
def matchPreviousExpr(expr): """ Helper to define an expression that is indirectly defined from the tokens matched in a previous expression, that is, it looks for a 'repeat' of a previous expression. For example:: first = Word(nums) second = matchPreviousExpr(first) matchExpr = first + ":" + second will match C{"1:1"}, but not C{"1:2"}. Because this matches by expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; the expressions are evaluated first, and then compared, so C{"1"} is compared with C{"10"}. Do I{not} use with packrat parsing enabled. """ rep = Forward() e2 = expr.copy() rep <<= e2 def copyTokenToRepeater(s,l,t): matchTokens = _flatten(t.asList()) def mustMatchTheseTokens(s,l,t): theseTokens = _flatten(t.asList()) if theseTokens != matchTokens: raise ParseException("",0,"") rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) expr.addParseAction(copyTokenToRepeater, callDuringTry=True) rep.setName('(prev) ' + _ustr(expr)) return rep
def replaceWith(replStr): """ Helper method for common parse actions that simply return a literal value. Especially useful when used with C{L{transformString<ParserElement.transformString>}()}. Example:: num = Word(nums).setParseAction(lambda toks: int(toks[0])) na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) term = na | num OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] """ return lambda s,l,t: [replStr]
def pop( self, *args, **kwargs): """ Removes and returns item at specified index (default=C{last}). Supports both C{list} and C{dict} semantics for C{pop()}. If passed no argument or an integer argument, it will use C{list} semantics and pop tokens from the list of parsed tokens. If passed a non-integer argument (most likely a string), it will use C{dict} semantics and pop the corresponding value from any defined results names. A second default return value argument is supported, just as in C{dict.pop()}. Example:: def remove_first(tokens): tokens.pop(0) print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] label = Word(alphas) patt = label("LABEL") + OneOrMore(Word(nums)) print(patt.parseString("AAB 123 321").dump()) # Use pop() in a parse action to remove named result (note that corresponding value is not # removed from list form of results) def remove_LABEL(tokens): tokens.pop("LABEL") return tokens patt.addParseAction(remove_LABEL) print(patt.parseString("AAB 123 321").dump()) prints:: ['AAB', '123', '321'] - LABEL: AAB ['AAB', '123', '321'] """ if not args: args = [-1] for k,v in kwargs.items(): if k == 'default': args = (args[0], v) else: raise TypeError("pop() got an unexpected keyword argument '%s'" % k) if (isinstance(args[0], int) or len(args) == 1 or args[0] in self): index = args[0] ret = self[index] del self[index] return ret else: defaultvalue = args[1] return defaultvalue
def dump(self, indent='', depth=0, full=True): """ Diagnostic method for listing out the contents of a C{ParseResults}. Accepts an optional C{indent} argument so that this string can be embedded in a nested display of other data. Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") result = date_str.parseString('12/31/1999') print(result.dump()) prints:: ['12', '/', '31', '/', '1999'] - day: 1999 - month: 31 - year: 12 """ out = [] NL = '\n' out.append( indent+_ustr(self.asList()) ) if full: if self.haskeys(): items = sorted((str(k), v) for k,v in self.items()) for k,v in items: if out: out.append(NL) out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) if isinstance(v,ParseResults): if v: out.append( v.dump(indent,depth+1) ) else: out.append(_ustr(v)) else: out.append(repr(v)) elif any(isinstance(vv,ParseResults) for vv in self): v = self for i,vv in enumerate(v): if isinstance(vv,ParseResults): out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) else: out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) return "".join(out)
def parseString( self, instring, parseAll=False ): """ Execute the parse expression with the given string. This is the main interface to the client code, once the complete expression has been built. If you want the grammar to require that the entire input string be successfully parsed, then set C{parseAll} to True (equivalent to ending the grammar with C{L{StringEnd()}}). Note: C{parseString} implicitly calls C{expandtabs()} on the input string, in order to report proper column numbers in parse actions. If the input string contains tabs and the grammar uses parse actions that use the C{loc} argument to index into the string being parsed, you can ensure you have a consistent view of the input string by: - calling C{parseWithTabs} on your grammar before calling C{parseString} (see L{I{parseWithTabs}<parseWithTabs>}) - define your parse action using the full C{(s,loc,toks)} signature, and reference the input string using the parse action's C{s} argument - explictly expand the tabs in your input string before calling C{parseString} Example:: Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text """ ParserElement.resetCache() if not self.streamlined: self.streamline() #~ self.saveAsList = True for e in self.ignoreExprs: e.streamline() if not self.keepTabs: instring = instring.expandtabs() try: loc, tokens = self._parse( instring, 0 ) if parseAll: loc = self.preParse( instring, loc ) se = Empty() + StringEnd() se._parse( instring, loc ) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc else: return tokens
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): super(Word,self).__init__() if excludeChars: initChars = ''.join(c for c in initChars if c not in excludeChars) if bodyChars: bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) self.initCharsOrig = initChars self.initChars = set(initChars) if bodyChars : self.bodyCharsOrig = bodyChars self.bodyChars = set(bodyChars) else: self.bodyCharsOrig = initChars self.bodyChars = set(initChars) self.maxSpecified = max > 0 if min < 1: raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") self.minLen = min if max > 0: self.maxLen = max else: self.maxLen = _MAX_INT if exact > 0: self.maxLen = exact self.minLen = exact self.name = _ustr(self) self.errmsg = "Expected " + self.name self.mayIndexError = False self.asKeyword = asKeyword if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): if self.bodyCharsOrig == self.initCharsOrig: self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) elif len(self.initCharsOrig) == 1: self.reString = "%s[%s]*" % \ (re.escape(self.initCharsOrig), _escapeRegexRangeChars(self.bodyCharsOrig),) else: self.reString = "[%s][%s]*" % \ (_escapeRegexRangeChars(self.initCharsOrig), _escapeRegexRangeChars(self.bodyCharsOrig),) if self.asKeyword: self.reString = r"\b"+self.reString+r"\b" try: self.re = re.compile( self.reString ) except Exception: self.re = None
def traceParseAction(f): """ Decorator for debugging parse actions. When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. Example:: wd = Word(alphas) @traceParseAction def remove_duplicate_chars(tokens): return ''.join(sorted(set(''.join(tokens))) wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) print(wds.parseString("slkdjs sld sldd sdlf sdljf")) prints:: >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) <<leaving remove_duplicate_chars (ret: 'dfjkls') ['dfjkls'] """ f = _trim_arity(f) def z(*paArgs): thisFunc = f.__name__ s,l,t = paArgs[-3:] if len(paArgs)>3: thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) try: ret = f(*paArgs) except Exception as exc: sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) raise sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) return ret try: z.__name__ = f.__name__ except AttributeError: pass return z # # global helpers #
def tokenMap(func, *args): """ Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional args are passed, they are forwarded to the given function as additional arguments after the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the parsed data to an integer using base 16. Example (compare the last to example in L{ParserElement.transformString}:: hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) hex_ints.runTests(''' 00 11 22 aa FF 0a 0d 1a ''') upperword = Word(alphas).setParseAction(tokenMap(str.upper)) OneOrMore(upperword).runTests(''' my kingdom for a horse ''') wd = Word(alphas).setParseAction(tokenMap(str.title)) OneOrMore(wd).setParseAction(' '.join).runTests(''' now is the winter of our discontent made glorious summer by this sun of york ''') prints:: 00 11 22 aa FF 0a 0d 1a [0, 17, 34, 170, 255, 10, 13, 26] my kingdom for a horse ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] now is the winter of our discontent made glorious summer by this sun of york ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] """ def pa(s,l,t): return [func(tokn, *args) for tokn in t] try: func_name = getattr(func, '__name__', getattr(func, '__class__').__name__) except Exception: func_name = str(func) pa.__name__ = func_name return pa