我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用re.VERBOSE。
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
def _encode_regex(name, value, dummy0, dummy1): """Encode a python regex or bson.regex.Regex.""" flags = value.flags # Python 2 common case if flags == 0: return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00" # Python 3 common case elif flags == re.UNICODE: return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00" else: sflags = b"" if flags & re.IGNORECASE: sflags += b"i" if flags & re.LOCALE: sflags += b"l" if flags & re.MULTILINE: sflags += b"m" if flags & re.DOTALL: sflags += b"s" if flags & re.UNICODE: sflags += b"u" if flags & re.VERBOSE: sflags += b"x" sflags += b"\x00" return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
def execute(cls, ids, data): import pydot pool = Pool() Model = pool.get('ir.model') ActionReport = pool.get('ir.action.report') if not data['filter']: filter = None else: filter = re.compile(data['filter'], re.VERBOSE) action_report_ids = ActionReport.search([ ('report_name', '=', cls.__name__) ]) if not action_report_ids: raise Exception('Error', 'Report (%s) not find!' % cls.__name__) action_report = ActionReport(action_report_ids[0]) models = Model.browse(ids) graph = pydot.Dot(fontsize="8") graph.set('center', '1') graph.set('ratio', 'auto') cls.fill_graph(models, graph, level=data['level'], filter=filter) data = graph.create(prog='dot', format='png') return ('png', fields.Binary.cast(data), False, action_report.name)
def remove_stack_traces(out): # this regexp taken from Python 2.5's doctest traceback_re = re.compile(r""" # Grab the traceback header. Different versions of Python have # said different things on the first traceback line. ^(?P<hdr> Traceback\ \( (?: most\ recent\ call\ last | innermost\ last ) \) : ) \s* $ # toss trailing whitespace on the header. (?P<stack> .*?) # don't blink: absorb stuff until... ^(?=\w) # a line *starts* with alphanum. .*?(?P<exception> \w+ ) # exception name (?P<msg> [:\n] .*) # the rest """, re.VERBOSE | re.MULTILINE | re.DOTALL) blocks = [] for block in blankline_separated_blocks(out): blocks.append(traceback_re.sub(r"\g<hdr>\n...\n\g<exception>\g<msg>", block)) return "".join(blocks)
def set_memlimit(limit): global max_memuse global real_max_memuse sizes = { 'k': 1024, 'm': _1M, 'g': _1G, 't': 1024*_1G, } m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit, re.IGNORECASE | re.VERBOSE) if m is None: raise ValueError('Invalid memory limit %r' % (limit,)) memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()]) real_max_memuse = memlimit if memlimit > MAX_Py_ssize_t: memlimit = MAX_Py_ssize_t if memlimit < _2G - 1: raise ValueError('Memory limit %r too low to be useful' % (limit,)) max_memuse = memlimit
def parse_title(title): """ Returns parsed contents of a post's title """ ro = re.compile(r""" (?P<artist>.+[^- ]+) # The artist \s*-+\s* # Skip some spaces and dashes (?P<title>.*) # The title \s*\[ # Skip some spaces and opening bracket (?P<genre>.*) # The genre \]\s*\( # Skip closing bracket, spaces and opening parenthesis (?P<year>\d+) # The year \) # Skip closing parenthesis """, re.VERBOSE | re.IGNORECASE) mo = ro.search(title) if mo is None: return return {'artist': mo.group('artist'), 'title': mo.group('title'), 'genre': mo.group('genre'), 'year': mo.group( 'year')}
def wikilink(value): """ Produce wiki style links to other pages within the database, for use in comments fields: {{ a_note|wikilink|truncatewords_html:5 }} Note that it's better to use truncatewords_html with this filter, rather than plain truncatewords """ WIKILINK_RE = re.compile(r""" (?P<lead>\s|^) # possible leading whitespace (?P<wikilink>/ # an initial / (\w+/)+ # multiples of any number of identifier chars + / ) """, re.VERBOSE) def wikilink_sub_callback(match_obj): link = match_obj.group("wikilink") lead = match_obj.group("lead") return '%s<a href="%s">%s</a>' % (lead, escape(link), escape(link)) return mark_safe(WIKILINK_RE.sub(wikilink_sub_callback, value))
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) text_id = mobj.group('textid') page = self._download_json( 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') info = page['info'] formats = [ { 'format_id': f['type'], 'filesize': int(f['filesize']), 'url': f['url'] } for f in info['rfiles'] ] self._sort_formats(formats) return { 'id': info['vid'], 'title': info['Subject'], 'duration': int(info['duration']) / 1000.0, 'formats': formats, 'thumbnail': info.get('bimg') or info.get('img'), }
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) video_id = mobj.group('id') info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id info = self._download_json(info_url, video_id) videos_urls = map(lambda v: v['play_page_url'], info['result']['data']) # Prefer sina video since they have thumbnails videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u) player_url = videos_urls[-1] m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html', player_url) if m_sina is not None: self.to_screen('Sina video detected') sina_id = m_sina.group(1) player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id return self.url_result(player_url)
def test_detect_verbose(self): """Test verbose.""" pattern = bre.compile_search( r''' This is a # \Qcomment\E This is not a \# \Qcomment\E This is not a [#\ ] \Qcomment\E This is not a [\#] \Qcomment\E This\ is\ a # \Qcomment\E ''', re.VERBOSE ) self.assertEqual( pattern.pattern, r''' This is a # \\Qcomment\\E This is not a \# comment This is not a [#\ ] comment This is not a [\#] comment This\ is\ a # \\Qcomment\\E ''' )
def _apply_search_backrefs(pattern, flags=0): """Apply the search backrefs to the search pattern.""" if isinstance(pattern, (compat.string_type, compat.binary_type)): re_verbose = bool(VERBOSE & flags) re_unicode = None if compat.PY3 and bool(ASCII & flags): re_unicode = False elif bool(UNICODE & flags): re_unicode = True pattern = SearchTemplate(pattern, re_verbose, re_unicode).apply() elif isinstance(pattern, RE_TYPE): if flags: raise ValueError("Cannot process flags argument with a compiled pattern!") else: raise TypeError("Not a string or compiled pattern!") return pattern
def stat_regexp_generator(data): """Generate a regeular expression that will swift-recon stats. Lines printed by swift-recon look like:: [data] low: 0, high: 0, avg: 0.0, total: 0, Failed: 0.0%, no_result: 0, reported: 0 Where data above is the value of the ``data`` parameter passed to the function. """ expression = """\s+low:\s+(?P<low>\d+), # parse out the low result \s+high:\s+(?P<high>\d+), # parse out the high result \s+avg:\s+(?P<avg>\d+.\d+), # you get the idea now \s+total:\s+(?P<total>\d+), \s+Failed:\s+(?P<failed>\d+.\d+%), \s+no_result:\s+(?P<no_result>\d+), \s+reported:\s+(?P<reported>\d+)""" return re.compile('\[' + data + '\]' + expression, re.VERBOSE)
def rnc_markup_tokenizer(s): """ [rn][mod1][num][\s-] """ rn_re = re.compile(u"""(?P<p1>[b??#]?[ivIV]+) (?P<p2>[^\d\s-]*) (?P<p3>[^\s-]*) (?P<sep>(\s*-\s*|\s*))""", re.VERBOSE|re.UNICODE) i = 0 retval = [] while i < len(s): m = rn_re.match(s[i:]) if not m: retval.append((u'ERR:%s' % s[i:], '', '', '')) break retval.append((m.group('p1'), m.group('p2'), m.group('p3'), m.group('sep'))) i += m.end() return retval
def get_single_author_pattern(): """Generates a simple, one-hit-only, author name pattern, matching just one author name in either of the 'S I' or 'I S' formats. The author patterns are the same ones used inside the main 'author group' pattern generator. This function is used not for reference extraction, but for author extraction. Numeration is appended to author patterns by default. @return (string): Just the author name pattern designed to identify single author names in both SI and IS formats. (NO 'et al', editors, 'and'... matching) @return: (string) the union of 'initial surname' and 'surname initial' authors""" return "(?:" + get_initial_surname_author_pattern(incl_numeration=True) + \ "|" + get_surname_initial_author_pattern(incl_numeration=True) + ")" # Targets single author names # re_single_author_pattern = re.compile(get_single_author_pattern(), re.VERBOSE) # pylint: enable=C0103
def __init__(cls, name, bases, dct): super(_TemplateMetaclass, cls).__init__(name, bases, dct) if 'pattern' in dct: pattern = cls.pattern else: pattern = _TemplateMetaclass.pattern % { 'delim' : _re.escape(cls.delimiter), 'id' : cls.idpattern, } cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
def __compile_tokenize_pattern(self): """ Compiles the regular expression used by self.tokenize() and stores a reference to it in self.tokenize_pattern. The full regular expression used here is a concatenation of several patterns (as written above self.__init__() and conditionally using either the word pattern that matches hyphen-broken words, or the pattern that only captures "whole" words. """ # Capture hyphen-broken words as single tokens by default. word_pattern_str = self._pattern_str_word_with_hyphen_breaks # If we're not supposed to remove hyphen breaks, use the alternate word # pattern, which doesn't look for "hyphen breaks". if not self.remove_hyphen_breaks: word_pattern_str = self._pattern_str_word # Concatenate the separate pattern strings into the final pattern string. # The order here indicates group match priority (i.e. match "words" # first, etc.) # Join the regex pattern strings with the "or" character ("|"). final_tokenize_pattern_str = r"|".join([ word_pattern_str, self._pattern_str_entity, self._pattern_str_remnant, self._pattern_str_whitespace, self._pattern_str_newline ]) # Compile the final pattern. Those strings have whitespace, so make # sure re.VERBOSE is one of the flags used! self.tokenize_pattern = re.compile(final_tokenize_pattern_str, re.I | re.VERBOSE)
def __init__(self, fileLoader, baseDir, varBase, sourceName): self.__pattern = re.compile(r""" \$<(?: (?P<escaped>\$) | (?P<named>[<'][^'>]+)['>]> | (?P<braced>[<'][^'>]+)['>]> | (?P<invalid>) ) """, re.VERBOSE) self.__baseDir = baseDir self.__varBase = re.sub(r'[^a-zA-Z0-9_]', '_', varBase, flags=re.DOTALL) self.__fileLoader = fileLoader self.__sourceName = sourceName
def _env_var_constructor(loader, node): var = re.compile(r"\$\{([^}:\s]+):?([^}]+)?\}", re.VERBOSE) value = loader.construct_scalar(node) return var.sub(_replace_env_var, value)
def setup_yaml_parser(): var = re.compile(r".*\$\{.*\}.*", re.VERBOSE) yaml.add_constructor('!env_var', _env_var_constructor) yaml.add_implicit_resolver('!env_var', var)
def test_ip_v4_pattern(self): ip_v4_pattern = self.patterns.IP_V4 for ip_v4, result in IP_V4_DATA.items(): if result: self.assertIsNotNone(re.match(ip_v4_pattern, ip_v4, re.VERBOSE | re.IGNORECASE | re.DOTALL)) else: self.assertIsNone(re.match(ip_v4_pattern, ip_v4, re.VERBOSE | re.IGNORECASE | re.DOTALL))
def test_ip_v6_pattern(self): ip_v6_pattern = self.patterns.IP_V6 for ip_v6, result in IP_V6_DATA.items(): if result: self.assertIsNotNone(re.match(ip_v6_pattern, ip_v6, re.VERBOSE | re.IGNORECASE | re.DOTALL)) else: self.assertIsNone(re.match(ip_v6_pattern, ip_v6, re.VERBOSE | re.IGNORECASE | re.DOTALL))
def check_name(self,name): pattern = re.compile(r"^[ a-zA-Z']+$",re.VERBOSE) if re.match(pattern,name): return True else: return False # Check for vaild Unix username
def check_username(self,username): pattern = re.compile(r"^\w{5,255}$",re.VERBOSE) if re.match(pattern,username): return True else: return False # Check for vaild Unix UID
def check_uid(self,uid): pattern = re.compile(r"^\d{1,10}$",re.VERBOSE) if re.match(pattern,uid): return True else: return False # Check for vaild IP address
def check_ip(self,ip): pattern = re.compile(r"\b(([01]?\d?\d|2[0-4]\d|25[0-5])\.){3}([01]?\d?\d|2[0-4]\d|25[0-5])\b",re.VERBOSE) if re.match(pattern,ip) and ip != "0.0.0.0": return True else: return False # Check for vaild system hostanme
def check_hostname(self,hostname): pattern = re.compile(r"^[a-zA-Z0-9\-\.]{1,100}$",re.VERBOSE) if re.match(pattern,hostname): return True else: return False # Display Menu
def _form_master_re(relist,reflags,ldict,toknames): if not relist: return [] regex = "|".join(relist) try: lexre = re.compile(regex,re.VERBOSE | reflags) # Build the index to function map for the matching engine lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) lexindexnames = lexindexfunc[:] for f,i in lexre.groupindex.items(): handle = ldict.get(f,None) if type(handle) in (types.FunctionType, types.MethodType): lexindexfunc[i] = (handle,toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f if f.find("ignore_") > 0: lexindexfunc[i] = (None,None) else: lexindexfunc[i] = (None, toknames[f]) return [(lexre,lexindexfunc)],[regex],[lexindexnames] except Exception: m = int(len(relist)/2) if m == 0: m = 1 llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) return llist+rlist, lre+rre, lnames+rnames # ----------------------------------------------------------------------------- # def _statetoken(s,names) # # Given a declaration name s of the form "t_" and a dictionary whose keys are # state names, this function returns a tuple (states,tokenname) where states # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # -----------------------------------------------------------------------------
def translate(string, lang_direction): """Takes a string that is to be translated and returns the translated string, doesn't translate the %(format)s parts, they must remain the same text as the msgid""" # simple format chars like %s can be 'translated' ok, they just pass through unaffected named_format_regex = re.compile(r"%\([^\)]+?\)[sd]", re.VERBOSE) matches = named_format_regex.findall(string) new = None if len(matches) == 0: # There are no format specifiers in this string, so just do a straight translation # this fails if we've missed a format specifier assert "%(" not in string, string new = translate_subpart(string, lang_direction) else: # we need to do complicate translation of the bits inside full_trans = translate_subpart(string, lang_direction) for match in matches: # then, for each format specifier, replace back in the string translated_match = translate_subpart(match, lang_direction) # during the translation some extra punctuation/spaces might have been added # remove them translated_match_match = named_format_regex.search(translated_match) assert translated_match_match translated_match = translated_match_match.group(0) # put back the format specifier, the case of the format specifier might have changed replace = re.compile(re.escape(translated_match), re.IGNORECASE) full_trans = replace.sub(match, full_trans) new = full_trans return new
def flags(key): flag = 0 if 'a' in key: flag += re.ASCII if 'i' in key: flag += re.IGNORECASE if 'l' in key: flag += re.LOCALE if 'm' in key: flag += re.MULTILINE if 's' in key: flag += re.DOTALL if 'x' in key: flag += re.VERBOSE return flag
def extract_dependencies(file_path): """ Parse the file contents and return the list of dependencies. """ with open(file_path) as fh: file_contents = fh.read() match = re.search(r"""^\s+dependencies [^\[]+ \[ ([^\]]*) \]""", file_contents, flags=re.VERBOSE | re.MULTILINE) if not match: return [] deps = match.group(1).strip() if not deps: return [] match_iter = re.finditer(r"""\( '([^']+)' ,\s* '([^_][^']+)' \)""", deps, flags=re.VERBOSE) return [(match.group(1), match.group(2)) for match in match_iter]
def __init__(self, states, first): self.regexes = {} self.toks = {} for state, rules in states.items(): parts = [] for tok in rules: groupid = "t%d" % tok.id self.toks[groupid] = tok parts.append("(?P<%s>%s)" % (groupid, tok.regex)) self.regexes[state] = re.compile("|".join(parts), re.MULTILINE | re.VERBOSE) self.state = first
def is_valid_ipv4(ip): """Validates IPv4 addresses. """ pattern = re.compile(r"^\d{1,3}\.\d{1,3}.\d{1,3}.\d{1,3}$", re.VERBOSE | re.IGNORECASE) if pattern.match(ip) is None: return False for x in ip.split("."): val = int(x) if val < 0 or val > 255: return False return True
def simplify_warnings(out): warn_re = re.compile(r""" # Cut the file and line no, up to the warning name ^.*:\d+:\s (?P<category>\w+): \s+ # warning category (?P<detail>.+) $ \n? # warning message ^ .* $ # stack frame """, re.VERBOSE | re.MULTILINE) return warn_re.sub(r"\g<category>: \g<detail>", out)
def rcompile(pattern, flags=0, verbose=False): """A wrapper for re.compile that checks whether "pattern" is a regex object or a string to be compiled, and automatically adds the re.UNICODE flag. """ if not isinstance(pattern, string_type): # If it's not a string, assume it's already a compiled pattern return pattern if verbose: flags |= re.VERBOSE return re.compile(pattern, re.UNICODE | flags)
def readtab(self,tabfile,fdict): if isinstance(tabfile,types.ModuleType): lextab = tabfile else: if sys.version_info[0] < 3: exec("import %s as lextab" % tabfile) else: env = { } exec("import %s as lextab" % tabfile, env,env) lextab = env['lextab'] if getattr(lextab,"_tabversion","0.0") != __version__: raise ImportError("Inconsistent PLY version") self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals self.lexstateinfo = lextab._lexstateinfo self.lexstateignore = lextab._lexstateignore self.lexstatere = { } self.lexstateretext = { } for key,lre in lextab._lexstatere.items(): titem = [] txtitem = [] for i in range(len(lre)): titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) txtitem.append(lre[i][0]) self.lexstatere[key] = titem self.lexstateretext[key] = txtitem self.lexstateerrorf = { } for key,ef in lextab._lexstateerrorf.items(): self.lexstateerrorf[key] = fdict[ef] self.begin('INITIAL') # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------
def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) if m.group('type').startswith('embed'): desktop_url = m.group('proto') + 'www' + m.group('urlmain') return self.url_result(desktop_url, 'TED') name = m.group('name') if m.group('type_talk'): return self._talk_info(url, name) elif m.group('type_watch'): return self._watch_info(url, name) else: return self._playlist_videos_info(url, name)
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) if mobj is None: raise ExtractorError('Invalid URL: %s' % url) track_id = mobj.group('track_id') token = None if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id token = mobj.group('secret_token') if token: info_json_url += "&secret_token=" + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) real_url = query['url'][0] # If the token is in the query of the original url we have to # manually add it if 'secret_token' in query: real_url += '?secret_token=' + query['secret_token'][0] return self.url_result(real_url) else: # extract uploader (which is in the url) uploader = mobj.group('uploader') # extract simple title (uploader + slug of song title) slug_title = mobj.group('title') token = mobj.group('token') full_title = resolve_title = '%s/%s' % (uploader, slug_title) if token: resolve_title += '/%s' % token self.report_resolve(full_title) url = 'http://soundcloud.com/%s' % resolve_title info_json_url = self._resolv_url(url) info = self._download_json(info_json_url, full_title, 'Downloading info JSON') return self._extract_info_dict(info, full_title, secret_token=token)
def cds_from_gbk(gb_file): gb_record = SeqIO.read(open(gb_file,"rU"), "genbank") #if strain_id is not None: # gb_record.id = strain_id output = pd.DataFrame() sign = lambda x: '+' if x > 0 else '-' for feature in gb_record.features: if feature.type == "CDS": tmp = {} tmp = {'BGC': gb_record.id, 'locus_tag': feature.qualifiers['locus_tag'][0], 'start': feature.location.start.position, 'stop': feature.location.end.position, 'strand': sign(feature.location.strand) } if 'note' in feature.qualifiers: for note in feature.qualifiers['note']: product = re.search( r"""smCOG: \s (?P<product>.*?) \s+ \(Score: \s* (?P<score>.*); \s* E-value: \s (?P<e_value>.*?)\);""", note, re.VERBOSE) if product is not None: product = product.groupdict() product['score'] = float(product['score']) product['e_value'] = float(product['e_value']) for p in product: tmp[p] = product[p] output = output.append(pd.Series(tmp), ignore_index=True) return output
def find_id(self, contents=None): contents = self._load_url() if not contents else contents if not contents: return False pattern = r'(entity_id["\' ]{1,3}:["\' ]{1,3})([\d]+)' regex = re.compile(pattern, flags=re.VERBOSE) match = regex.search(contents) try: return match.group(2) except (IndexError, AttributeError): return False
def _parse(self): """Get axes and shape from file names.""" if not self.pattern: raise self.ParseError("invalid pattern") pattern = re.compile(self.pattern, re.IGNORECASE | re.VERBOSE) matches = pattern.findall(self.files[0]) if not matches: raise self.ParseError("pattern doesn't match file names") matches = matches[-1] if len(matches) % 2: raise self.ParseError("pattern doesn't match axis name and index") axes = ''.join(m for m in matches[::2] if m) if not axes: raise self.ParseError("pattern doesn't match file names") indices = [] for fname in self.files: matches = pattern.findall(fname)[-1] if axes != ''.join(m for m in matches[::2] if m): raise ValueError("axes don't match within the image sequence") indices.append([int(m) for m in matches[1::2] if m]) shape = tuple(numpy.max(indices, axis=0)) start_index = tuple(numpy.min(indices, axis=0)) shape = tuple(i-j+1 for i, j in zip(shape, start_index)) if product(shape) != len(self.files): warnings.warn("files are missing. Missing data are zeroed") self.axes = axes.upper() self.shape = shape self._indices = indices self._start_index = start_index
def build(self): '''Initialize the tokenizer.''' self.lexer = lex.lex(object=self, reflags=(re.DOTALL | re.MULTILINE | re.VERBOSE)) self.lexer.x = 1