我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用re.escape()。
def _make_boundary(text=None): # Craft a random boundary. If text is given, ensure that the chosen # boundary doesn't appear in the text. token = random.randrange(sys.maxint) boundary = ('=' * 15) + (_fmt % token) + '==' if text is None: return boundary b = boundary counter = 0 while True: cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) if not cre.search(text): break b = boundary + '.' + str(counter) counter += 1 return b
def pickline(file, key, casefold = 1): try: f = open(file, 'r') except IOError: return None pat = re.escape(key) + ':' prog = re.compile(pat, casefold and re.IGNORECASE) while 1: line = f.readline() if not line: break if prog.match(line): text = line[len(key)+1:] while 1: line = f.readline() if not line or not line[0].isspace(): break text = text + line return text.strip() return None
def _findLib_gcc(name): expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name) fdout, ccout = tempfile.mkstemp() os.close(fdout) cmd = 'if type gcc >/dev/null 2>&1; then CC=gcc; elif type cc >/dev/null 2>&1; then CC=cc;else exit 10; fi;' \ '$CC -Wl,-t -o ' + ccout + ' 2>&1 -l' + name try: f = os.popen(cmd) try: trace = f.read() finally: rv = f.close() finally: try: os.unlink(ccout) except OSError, e: if e.errno != errno.ENOENT: raise if rv == 10: raise OSError, 'gcc or cc command not found' res = re.search(expr, trace) if not res: return None return res.group(0)
def replace_citations_strings_with_ids(text, string_to_id): """ Convert citations to their IDs for pandoc. `text` is markdown source text `string_to_id` is a dictionary like: @10.7287/peerj.preprints.3100v1 ? 11cb5HXoY """ for old, new in string_to_id.items(): text = re.sub( pattern=re.escape(old) + r'(?![\w:.#$%&\-+?<>~/]*[a-zA-Z0-9/])', repl='@' + new, string=text, ) return text
def setup(self, config): """ Deterine max size to unpack and which directories to ignore. :param config: Configuration object. :type config: ``dict`` """ self.max_size = config.get(helper.MAX_FILE_SIZE, 128) * 1024 * 1024 self.config = config ignore = {} path = os.path.join( config[helper.CODE_ROOT], 'utils', 'diskimage_ignore.txt') with open(path) as inp: for line in inp: if len(line.strip()) == 0 or line.startswith('#'): continue ignore[re.escape(line.strip().lower())] = True self.ignore = re.compile('|'.join(list(ignore.keys())), re.I)
def prepare(rep, onlyAtEnds=False, multiOccur=True): '''Compiles and returns a regex matching the input list of strings to replace Note: returns two values wrapped as one; can feed tuple directly into apply ''' if type(rep) == list: rep = {key: '' for key in rep} rep = dict((re.escape(k), v) for k, v in rep.items()) if onlyAtEnds: if multiOccur: expr = str.format("^[{0}]+|[{0}]+$", ''.join(rep.keys())) else: expr = str.format("{0}|{1}", "|".join(['^%s' % key for key in rep.keys()]), "|".join(['%s$' % key for key in rep.keys()]) ) else: expr = "|".join(rep.keys()) pattern = re.compile(expr) return (pattern, rep)
def visit_textclause(self, textclause, **kw): def do_bindparam(m): name = m.group(1) if name in textclause._bindparams: return self.process(textclause._bindparams[name], **kw) else: return self.bindparam_string(name, **kw) if not self.stack: self.isplaintext = True # un-escape any \:params return BIND_PARAMS_ESC.sub( lambda m: m.group(1), BIND_PARAMS.sub( do_bindparam, self.post_process_text(textclause.text)) )
def list(self): """Lists all sessions in the store. .. versionadded:: 0.6 """ before, after = self.filename_template.split('%s', 1) filename_re = re.compile(r'%s(.{5,})%s$' % (re.escape(before), re.escape(after))) result = [] for filename in os.listdir(self.path): #: this is a session that is still being saved. if filename.endswith(_fs_transaction_suffix): continue match = filename_re.match(filename) if match is not None: result.append(match.group(1)) return result
def compile_rules(environment): """Compiles all the rules from the environment into a list of rules.""" e = re.escape rules = [ (len(environment.comment_start_string), 'comment', e(environment.comment_start_string)), (len(environment.block_start_string), 'block', e(environment.block_start_string)), (len(environment.variable_start_string), 'variable', e(environment.variable_start_string)) ] if environment.line_statement_prefix is not None: rules.append((len(environment.line_statement_prefix), 'linestatement', r'^[ \t\v]*' + e(environment.line_statement_prefix))) if environment.line_comment_prefix is not None: rules.append((len(environment.line_comment_prefix), 'linecomment', r'(?:^|(?<=\S))[^\S\r\n]*' + e(environment.line_comment_prefix))) return [x[1:] for x in sorted(rules, reverse=True)]
def import_string(import_name, silent=False): """Imports an object based on a string. This is useful if you want to use import paths as endpoints or something similar. An import path can be specified either in dotted notation (``xml.sax.saxutils.escape``) or with a colon as object delimiter (``xml.sax.saxutils:escape``). If the `silent` is True the return value will be `None` if the import fails. :return: imported object """ try: if ':' in import_name: module, obj = import_name.split(':', 1) elif '.' in import_name: items = import_name.split('.') module = '.'.join(items[:-1]) obj = items[-1] else: return __import__(import_name) return getattr(__import__(module, None, None, [obj]), obj) except (ImportError, AttributeError): if not silent: raise
def clean_downloaded_metadata(self, mi): docase = ( mi.language == 'zhn' ) if mi.title and docase: # Remove series information from title m = re.search(r'\S+\s+(\(.+?\s+Book\s+\d+\))$', mi.title) if m is not None: mi.title = mi.title.replace(m.group(1), '').strip() mi.title = fixcase(mi.title) mi.authors = fixauthors(mi.authors) if mi.tags and docase: mi.tags = list(map(fixcase, mi.tags)) mi.isbn = check_isbn(mi.isbn) if mi.series and docase: mi.series = fixcase(mi.series) if mi.title and mi.series: for pat in (r':\s*Book\s+\d+\s+of\s+%s$', r'\(%s\)$', r':\s*%s\s+Book\s+\d+$'): pat = pat % re.escape(mi.series) q = re.sub(pat, '', mi.title, flags=re.I).strip() if q and q != mi.title: mi.title = q break
def _glob_to_re(self, pattern): """Translate a shell-like glob pattern to a regular expression. Return a string containing the regex. Differs from 'fnmatch.translate()' in that '*' does not match "special characters" (which are platform-specific). """ pattern_re = fnmatch.translate(pattern) # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, # and by extension they shouldn't match such "special characters" under # any OS. So change all non-escaped dots in the RE to match any # character except the special characters (currently: just os.sep). sep = os.sep if os.sep == '\\': # we're using a regex to manipulate a regex, so we need # to escape the backslash twice sep = r'\\\\' escaped = r'\1[^%s]' % sep pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) return pattern_re
def get(self): keyword = self.get_query_argument('keyword', '') if not keyword: self.custom_error('???????') pattern = u'.*{}.*'.format(re.escape(keyword)) limit = 20 page = int(self.get_query_argument('page', default=1)) page = 1 if page <= 0 else page # ????title???? # ?????? elstiasearch ???? cursor = self.db.topic.find({ 'title': {'$regex': pattern, '$options': 'i'} }) total = yield cursor.count() cursor.sort([('time', -1)]).limit( limit).skip((page - 1) * limit) topics = yield cursor.to_list(length=limit) self.render('search/template/search.html', topics=topics, total=total, page=page, keyword=keyword)
def render(self): # make local variables for the loop below (faster) local_dict = self.key_dict local_template = self.template local_varlist = self.template.varlist local_odel = self.odel local_cdel = self.cdel local_htmlent = self.html_entities if local_htmlent: from xml.sax.saxutils import escape #from Python std lib for key in local_dict: if key in local_varlist: value = local_dict[key] replace_string = local_odel + key + local_cdel if local_htmlent: value = escape(value) #xml.sax.saxutils function local_template = local_template.replace(replace_string, value) return local_template ##TODO : multiple file render method?
def set_status(self, status_code, reason=None): """Sets the status code for our response. :arg int status_code: Response status code. If ``reason`` is ``None``, it must be present in `httplib.responses <http.client.responses>`. :arg string reason: Human-readable reason phrase describing the status code. If ``None``, it will be filled in from `httplib.responses <http.client.responses>`. """ self._status_code = status_code if reason is not None: self._reason = escape.native_str(reason) else: try: self._reason = httputil.responses[status_code] except KeyError: raise ValueError("unknown status code %d", status_code)
def __init__(self, portal_names, app_matchers): if not isinstance(app_matchers, list) and not isinstance(app_matchers, tuple): assert isinstance(app_matchers, AppMatcher) app_matchers = (app_matchers,) for matcher in app_matchers: assert isinstance(matcher, AppMatcher) self.__app_matchers = app_matchers self.__portal_matchers = [] if not isinstance(portal_names, list) and not isinstance(portal_names, tuple): portal_names = (portal_names,) for name in portal_names: if isinstance(name, str): name = re.compile(re.escape(name)) assert hasattr(name, 'match') and callable(name.match) self.__portal_matchers.append(name)
def scrapeContacts(self): if (not self.srv): return self.getUIDs() if (not self.uids): return None contacts = [] for uid in self.uids: resp, data = self.srv.fetch(uid, "(RFC822)") for response_part in data: if isinstance(response_part, tuple): msg = email.message_from_string(response_part[1]) fromaddr = msg['from'] if (fromaddr): sender = msg['from'].split()[-1] address = re.sub(r'[<>]','',sender) # Ignore any occurences of own email address and add to list if not re.search(r'' + re.escape(self.user),address) and not address in contacts: contacts.append(address) print "IDENTIFED new contact [%s]" % (address) return contacts
def scrapeContacts(self): if (not self.srv): return self.getMessages() contacts = [] for (server_msg, body, octets) in self.msg_list: mail = email.message_from_string('\n'.join(body)) for part in mail.walk(): fromaddr = part['from'] if (fromaddr): sender = part['from'].split()[-1] address = re.sub(r'[<>]','',sender) # Ignore any occurences of own email address and add to list if not re.search(r'' + re.escape(self.user),address) and not address in contacts: contacts.append(address) print "IDENTIFED new contact [%s]" % (address) return contacts
def tiny_tokenize(text, stem=False, stop_words=[]): words = [] for token in wordpunct_tokenize(re.sub('[%s]' % re.escape(string.punctuation), ' ', \ text.decode(encoding='UTF-8', errors='ignore'))): if not token.isdigit() and not token in stop_words: if stem: try: w = EnglishStemmer().stem(token) except Exception as e: w = token else: w = token words.append(w) return words # return [EnglishStemmer().stem(token) if stem else token for token in wordpunct_tokenize( # re.sub('[%s]' % re.escape(string.punctuation), ' ', text.decode(encoding='UTF-8', errors='ignore'))) if # not token.isdigit() and not token in stop_words]
def strip_tags(text, strip_punctuation=False): # Return only the words from content, stripping punctuation and HTML. soup = BeautifulSoup(text) if strip_punctuation: punctuation = re.compile('[{}]+'.format(re.escape(p))) words_only = punctuation.sub('', soup.get_text()) return words_only words_only = soup.get_text() return words_only
def _search_for_query(self, query): if query in self._search_pattern_cache: return self._search_pattern_cache[query] # Build pattern: include all characters pattern = [] for c in query: # pattern.append('[^{0}]*{0}'.format(re.escape(c))) pattern.append('.*?{0}'.format(re.escape(c))) pattern = ''.join(pattern) search = re.compile(pattern, re.IGNORECASE).search self._search_pattern_cache[query] = search return search
def _escapeRegexRangeChars(s): #~ escape these chars: ^-] for c in r"\^-]": s = s.replace(c,_bslash+c) s = s.replace("\n",r"\n") s = s.replace("\t",r"\t") return _ustr(s)
def prune_file_list(self): build = self.get_finalized_command('build') base_dir = self.distribution.get_fullname() self.filelist.prune(build.build_base) self.filelist.prune(base_dir) sep = re.escape(os.sep) self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, is_regex=1)
def _setoption(arg): import re parts = arg.split(':') if len(parts) > 5: raise _OptionError("too many fields (max 5): %r" % (arg,)) while len(parts) < 5: parts.append('') action, message, category, module, lineno = [s.strip() for s in parts] action = _getaction(action) message = re.escape(message) category = _getcategory(category) module = re.escape(module) if module: module = module + '$' if lineno: try: lineno = int(lineno) if lineno < 0: raise ValueError except (ValueError, OverflowError): raise _OptionError("invalid lineno %r" % (lineno,)) else: lineno = 0 filterwarnings(action, message, category, module, lineno) # Helper for _setoption()
def addImage( self, name, imageFile, row=None, column=0, colspan=0, rowspan=0): #image = re.escape(image) self.__verifyItem(self.n_images, name, True) imgObj = self.__getImage(imageFile) self.__addImageObj(name, imgObj, row, column, colspan, rowspan)
def checkMatch(self, fieldValue, acListEntry): pattern = re.compile(re.escape(fieldValue) + '.*', re.IGNORECASE) return re.match(pattern, acListEntry) # function to get all matches as a list
def _get_line_comment_char(view): commentChar = "" commentChar2 = "" try: for pair in view.meta_info("shellVariables", 0): if pair["name"] == "TM_COMMENT_START": commentChar = pair["value"] if pair["name"] == "TM_COMMENT_START_2": commentChar2 = pair["value"] if commentChar and commentChar2: break except TypeError: pass if not commentChar2: return re.escape(commentChar.strip()) else: return "(" + re.escape(commentChar.strip()) + "|" + re.escape(commentChar2.strip()) + ")"
def build_pattern(self, query): return r'\b{0}\b'.format(re.escape(query))
def find_matches(self, prefix, end): escaped = re.escape(prefix) matches = [] while end > 0: match = search.reverse_search(self.view, r'^\s*{0}'.format(escaped), 0, end, flags=0) if (match is None) or (len(matches) == self.MAX_MATCHES): break line = self.view.line(match.begin()) end = line.begin() text = self.view.substr(line).lstrip() if text not in matches: matches.append(text) return matches
def get_link_pattern(): return re.escape(options.get('system.url-prefix')) \ .replace('https\\:', 'https?\\:') + '/'
def parse_webpage(item_name, wiki_source): item_name = item_name.replace(' ','_') # strips out the spaces in our items and replaces them with '_' so that they are in the format we are looking for in the source code search_word = 'Exchange:'+item_name+'"' # creates a variable that contains the term we will search the source code for to locate the item split_text = re.split(r'{0}'.format(re.escape(search_word)), wiki_source, maxsplit=1, flags=0) # splits the source code into 2 at the position of the word we are looking for, this means that the ge limit of the item is the first number in the second element of the split_text variable #print(search_word) # uncomment this if you wanna see the item it is searching for (useful for debugging errors searching for items) result = re.search(r'[0-9,]+', split_text[1]) # tries to locate the first number in the second entry of our split_text list. This number is in the format (any number of digits) + (,) + (any number of digits). This allows us to find numbers such as 25,000 limit = result.group(0) # this returns the correct values from our regex 'result' for more info try calling print(result) on the line above return(limit)
def assert_loader_error(self, events_by_sid, error, msg, infer_timestamps, loader): with self.assertRaisesRegexp(error, re.escape(msg)): loader( dtx, events_by_sid, infer_timestamps=infer_timestamps, )
def test_no_concrete_loader_defined(self): with self.assertRaisesRegexp( TypeError, re.escape(ABSTRACT_CONCRETE_LOADER_ERROR) ): BlazeEventDataSetLoaderNoConcreteLoader( bz.data( pd.DataFrame({ANNOUNCEMENT_FIELD_NAME: dtx, SID_FIELD_NAME: 0}) ) )
def find_function(funcname, filename): cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) try: fp = open(filename) except IOError: return None # consumer of this info expects the first line to be 1 lineno = 1 answer = None while 1: line = fp.readline() if line == '': break if cre.match(line): answer = funcname, filename, lineno break lineno = lineno + 1 fp.close() return answer # Interaction prompt line will separate file and call info from code # text using value of line_prefix string. A newline and arrow may # be to your liking. You can set it once pdb is imported using the # command "pdb.line_prefix = '\n% '". # line_prefix = ': ' # Use this to get the old situation back
def translate(pat): """Translate a shell PATTERN to a regular expression. There is no way to quote meta-characters. """ i, n = 0, len(pat) res = '' while i < n: c = pat[i] i = i+1 if c == '*': res = res + '.*' elif c == '?': res = res + '.' elif c == '[': j = i if j < n and pat[j] == '!': j = j+1 if j < n and pat[j] == ']': j = j+1 while j < n and pat[j] != ']': j = j+1 if j >= n: res = res + '\\[' else: stuff = pat[i:j].replace('\\','\\\\') i = j+1 if stuff[0] == '!': stuff = '^' + stuff[1:] elif stuff[0] == '^': stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) else: res = res + re.escape(c) return res + '\Z(?ms)'
def __init__(cls, name, bases, dct): super(_TemplateMetaclass, cls).__init__(name, bases, dct) if 'pattern' in dct: pattern = cls.pattern else: pattern = _TemplateMetaclass.pattern % { 'delim' : _re.escape(cls.delimiter), 'id' : cls.idpattern, } cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
def updateline(file, key, value, casefold = 1): try: f = open(file, 'r') lines = f.readlines() f.close() except IOError: lines = [] pat = re.escape(key) + ':(.*)\n' prog = re.compile(pat, casefold and re.IGNORECASE) if value is None: newline = None else: newline = '%s: %s\n' % (key, value) for i in range(len(lines)): line = lines[i] if prog.match(line): if newline is None: del lines[i] else: lines[i] = newline break else: if newline is not None: lines.append(newline) tempfile = file + "~" f = open(tempfile, 'w') for line in lines: f.write(line) f.close() os.rename(tempfile, file) # Test program
def find_library(name): ename = re.escape(name) expr = r':-l%s\.\S+ => \S*/(lib%s\.\S+)' % (ename, ename) f = os.popen('/sbin/ldconfig -r 2>/dev/null') try: data = f.read() finally: f.close() res = re.findall(expr, data) if not res: return _get_soname(_findLib_gcc(name)) res.sort(cmp= lambda x,y: cmp(_num_version(x), _num_version(y))) return res[-1]
def _findSoname_ldconfig(name): import struct if struct.calcsize('l') == 4: machine = os.uname()[4] + '-32' else: machine = os.uname()[4] + '-64' mach_map = { 'x86_64-64': 'libc6,x86-64', 'ppc64-64': 'libc6,64bit', 'sparc64-64': 'libc6,64bit', 's390x-64': 'libc6,64bit', 'ia64-64': 'libc6,IA-64', } abi_type = mach_map.get(machine, 'libc6') # XXX assuming GLIBC's ldconfig (with option -p) expr = r'(\S+)\s+\((%s(?:, OS ABI:[^\)]*)?)\)[^/]*(/[^\(\)\s]*lib%s\.[^\(\)\s]*)' \ % (abi_type, re.escape(name)) f = os.popen('/sbin/ldconfig -p 2>/dev/null') try: data = f.read() finally: f.close() res = re.search(expr, data) if not res: return None return res.group(1)