我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pyparsing.Word()。
def get(self, key, defaultValue=None): """ Returns named result matching the given key, or if there is no such name, then returns the given C{defaultValue} or C{None} if no C{defaultValue} is specified. Similar to C{dict.get()}. Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") result = date_str.parseString("1999/12/31") print(result.get("year")) # -> '1999' print(result.get("hour", "not specified")) # -> 'not specified' print(result.get("hour")) # -> None """ if key in self: return self[key] else: return defaultValue
def insert( self, index, insStr ): """ Inserts new element at location index in the list of parsed tokens. Similar to C{list.insert()}. Example:: print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] # use a parse action to insert the parse location in the front of the parsed results def insert_locn(locn, tokens): tokens.insert(0, locn) print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] """ self.__toklist.insert(index, insStr) # fixup indices in token dictionary for name,occurrences in self.__tokdict.items(): for k, (value, position) in enumerate(occurrences): occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def extend( self, itemseq ): """ Add sequence of elements to end of ParseResults list of elements. Example:: patt = OneOrMore(Word(alphas)) # use a parse action to append the reverse of the matched strings, to make a palindrome def make_palindrome(tokens): tokens.extend(reversed([t[::-1] for t in tokens])) return ''.join(tokens) print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' """ if isinstance(itemseq, ParseResults): self += itemseq else: self.__toklist.extend(itemseq)
def pprint(self, *args, **kwargs): """ Pretty-printer for parsed results as a list, using the C{pprint} module. Accepts additional positional or keyword args as defined for the C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) Example:: ident = Word(alphas, alphanums) num = Word(nums) func = Forward() term = ident | num | Group('(' + func + ')') func <<= ident + Group(Optional(delimitedList(term))) result = func.parseString("fna a,b,(fnb c,d,200),100") result.pprint(width=40) prints:: ['fna', ['a', 'b', ['(', 'fnb', ['c', 'd', '200'], ')'], '100']] """ pprint.pprint(self.asList(), *args, **kwargs) # add support for pickle protocol
def copy( self ): """ Make a copy of this C{ParserElement}. Useful for defining different parse actions for the same parsing pattern, using copies of the original parse element. Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) prints:: [5120, 100, 655360, 268435456] Equivalent form of C{expr.copy()} is just C{expr()}:: integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") """ cpy = copy.copy( self ) cpy.parseAction = self.parseAction[:] cpy.ignoreExprs = self.ignoreExprs[:] if self.copyDefaultWhiteChars: cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS return cpy
def searchString( self, instring, maxMatches=_MAX_INT ): """ Another extension to C{L{scanString}}, simplifying the access to the tokens found to match the given parse expression. May be called with optional C{maxMatches} argument, to clip searching after 'n' matches are found. Example:: # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters cap_word = Word(alphas.upper(), alphas.lower()) print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) prints:: ['More', 'Iron', 'Lead', 'Gold', 'I'] """ try: return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc
def __add__(self, other ): """ Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement converts them to L{Literal}s by default. Example:: greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" print (hello, "->", greet.parseString(hello)) Prints:: Hello, World! -> ['Hello', ',', 'World', '!'] """ if isinstance( other, basestring ): other = ParserElement._literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) return None return And( [ self, other ] )
def __call__(self, name=None): """ Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be passed as C{True}. If C{name} is omitted, same as calling C{L{copy}}. Example:: # these are equivalent userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") """ if name is not None: return self.setResultsName(name) else: return self.copy()
def ignore( self, other ): """ Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other ignorable patterns. Example:: patt = OneOrMore(Word(alphas)) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] patt.ignore(cStyleComment) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] """ if isinstance(other, basestring): other = Suppress(other) if isinstance( other, Suppress ): if other not in self.ignoreExprs: self.ignoreExprs.append(other) else: self.ignoreExprs.append( Suppress( other.copy() ) ) return self
def __str__( self ): try: return super(Word,self).__str__() except Exception: pass if self.strRepr is None: def charsAsStr(s): if len(s)>4: return s[:4]+"..." else: return s if ( self.initCharsOrig != self.bodyCharsOrig ): self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) else: self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) return self.strRepr
def delimitedList( expr, delim=",", combine=False ): """ Helper to define a delimited list of expressions - the delimiter defaults to ','. By default, the list elements and delimiters can have intervening whitespace, and comments, but this can be overridden by passing C{combine=True} in the constructor. If C{combine} is set to C{True}, the matching tokens are returned as a single token string, with the delimiters included; otherwise, the matching tokens are returned as a list of tokens, with the delimiters suppressed. Example:: delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] """ dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." if combine: return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) else: return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def locatedExpr(expr): """ Helper to decorate a returned token with its starting and ending locations in the input string. This helper adds the following results names: - locn_start = location where matched expression begins - locn_end = location where matched expression ends - value = the actual parsed results Be careful if the input text contains C{<TAB>} characters, you may want to call C{L{ParserElement.parseWithTabs}} Example:: wd = Word(alphas) for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): print(match) prints:: [[0, 'ljsdf', 5]] [[8, 'lksdjjf', 15]] [[18, 'lkkjj', 23]] """ locator = Empty().setParseAction(lambda s,l,t: l) return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) # convenience constants for positional expressions
def srange(s): r""" Helper to easily define string ranges for use in Word construction. Borrows syntax from regexp '[]' string range definitions:: srange("[0-9]") -> "0123456789" srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" The input string must be enclosed in []'s, and the returned string is the expanded character set joined into a single string. The values enclosed in the []'s may be: - a single character - an escaped character with a leading backslash (such as C{\-} or C{\]}) - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) (C{\0x##} is also supported for backwards compatibility) - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) """ _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) try: return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) except Exception: return ""