我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用re.subn()。
def del_comment(soql): result = soql if soql: # TODO # soql = soql.strip().replace('\t', ' ').replace('\r\n', ' ').replace('\n', ' ') soql = soql.strip().replace('\t', ' ') # delete // comment result1, number = re.subn("//.*", "", soql) # delete /**/ comment result, number = re.subn("/\*([\s|\S]*?)\*/", "", result1, flags=re.M) result = result.strip() # show_in_panel(result) return result # get sobject name from soql
def pre_process(dates): # \u5e74\u6708\u65e5 # dates = [re.subn(r'[\u4e00-\u5e73]|[\u5e75-\u6707]', '', tm)[0] for tm in dates] # dates = [re.subn(r'[\u6709-\u65e4]|[\u65e6-\u9fa5]', '', tm)[0] for tm in dates] # ??? dates = dates[:int(len(dates) / 2)] # ?????100?? dates = [d for d in dates if 100 > len(d) > 0] # ???????? dates = [d.replace('\n', ' ').replace('\t', ' ').replace('\b', ' ').replace(' ', ' ') for d in dates] # ???????????????? new_dates = [] for v in dates: v = re.sub('\A\s*', '', v) v = re.sub('\s*\Z', '', v) v = re.sub('\s+', ' ', v) new_dates.append(v) # ??????????????????????? shorter_dates = [] for v in new_dates: ffdi = find_first_digit_index(v) if ffdi: shorter_dates.append(v[ffdi-1:]) return shorter_dates
def _rename_schema(self, schema): """ Rename a schema name and return new name Args: schema (str): schema nae Returns: str """ try: return self._schema_new_names[schema] except KeyError: new_name = schema for pattern, repl in self.schema_rename.items(): new_name, subs_made = re.subn(pattern, repl, schema) if subs_made: break self._schema_new_names[schema] = new_name return new_name
def _rename_table(self, schema, table): """ Rename a table and rereturn new name Args: schema (str): schema name (the new one) table (str): table name Returns: str """ try: return self._table_new_names[(schema, table)] except KeyError: new_name = table _rules = self.table_rename.get(schema, {}) for pattern, repl in _rules.items(): new_name, subs_made = re.subn(pattern, repl, table) if subs_made: break self._table_new_names[(schema, table)] = new_name return new_name
def resolve_reference_ucp(cls, design_uri): """Retrieve artifacts from a UCP service endpoint. Return a byte array of the response content. Assumes Keystone authentication required. :param design_uri: Tuple as returned by urllib.parse for the design reference """ ks_sess = KeystoneUtils.get_session() (new_scheme, foo) = re.subn('^[^+]+\+', '', design_uri.scheme) url = urllib.parse.urlunparse((new_scheme, design_uri.netloc, design_uri.path, design_uri.params, design_uri.query, design_uri.fragment)) logger = logging.getLogger(__name__) logger.debug("Calling Keystone session for url %s" % str(url)) resp = ks_sess.get(url) if resp.status_code >= 400: raise errors.InvalidDesignReference( "Received error code for reference %s: %s - %s" % (url, str(resp.status_code), resp.text)) return resp.content
def apply(self, sed): _, repl, count, printit, _, write, filename = self.args # managing ampersand is done when converting to python format # manage empty regexp regexp = sed.cache_regexp(self.regexp) success, sed.PS = regexp.subn(repl, sed.PS, count=count) sed.subst_successful = sed.subst_successful or success if success: if printit: sed.printline(sed.PS) if write: sed.write_subst_file(filename, sed.PS) return self.next
def normalize(word): ''' Perform fuzzy search normalization (collapse commonly confused sounds so search is resilient to misspellings of Alutiiq words). >>> normalize('tuumiaqlluku') 'tumiaklluku' >>> normalize("Wiiwaq") 'uiuak' >>> normalize("estui'isuun") 'stuisun' ''' word = re.subn(r'(?<!n)g', 'r', word)[0] word = re.subn(r'[A-QS-Z]', lambda m: m.group().lower(), word)[0] word = (word.replace('q', 'k') .replace('y', 'i') .replace('w', 'u') .replace('e', '') .replace("'", '')) for vowel in 'aiu': word = re.sub(vowel + '+', vowel, word) return word
def find_substring_ignore_special_chars(s, substr): s = s.upper() substr = substr.upper() clean_s, dummy_subs_in_s = re.subn('[^A-Z0-9]', '', s) clean_substr, dummy_subs_in_substr = re.subn('[^A-Z0-9]', '', substr) startIndex = clean_s.find(clean_substr) if startIndex != -1: i = 0 real_index = 0 re_alphanum = re.compile('[A-Z0-9]') for real_index, char in enumerate(s): if re_alphanum.match(char): i += 1 if i > startIndex: break return real_index else: return -1
def search_and_replace_date(lines, newdate): pattern = gpar.date_assign_re % gpar.datevar repl = gpar.date_repl_re % newdate log.debug('search_and_replace_date: pattern: %s', pattern) log.debug('search_and_replace_date: replace: %s', repl) for ln, line in enumerate(lines): newline, n = re.subn(pattern, repl, line) if n: if line != newline: # pattern matched and line changed log.debug('found pattern: %s', line[:-1]) log.debug(' replacement: %s', newline[:-1]) lines[ln] = newline return True else: # pattern matched, but not changed return False return False
def logReSubn(subnExpr, preComment, postComment, errorMsg='') -> str: """ Takes in the result of a re.subn call, subnExpr, and logs preComment to stdout, then logs postComment and specifies the number of subs. Prints errorMsg in case of 0 subs. Returns the string from subnExpr with replacements made. """ out = subnExpr[0] subs = subnExpr[1] print(preComment) print(str(subs) + ' ' + postComment) if (subs == 0 and errorMsg != ''): print(errorMsg) return out
def subn(self, repl, string, count=0): if (isinstance(repl, SourcedString) or isinstance(string, SourcedString)): result = '' pos = 0 n = 0 for match in self.pattern.finditer(string): result += string[pos:match.start()] result += repl pos = match.end() n += 1 if count and n==count: break result += string[pos:] return result, n else: return self.pattern.subn(repl, string, count)
def patch_re_module(): """ Modify the standard ``re`` module by installing new versions of the functions ``re.compile``, ``re.sub``, and ``re.subn``, causing regular expression substitutions to return ``SourcedStrings`` when called with ``SourcedStrings`` arguments. Use this function only if necessary: it potentially affects all Python modules that use regular expressions! """ def new_re_sub(pattern, repl, string, count=0): return re.compile(pattern).sub(repl, string, count) def new_re_subn(pattern, repl, string, count=0): return re.compile(pattern).subn(repl, string, count) re.compile = SourcedStringRegexp re.sub = new_re_sub re.subn = new_re_subn
def _unescape_entities(xml_text): """ Strips tags of the form <text:span ...> from inside Jinja elements and unescapes HTML codes for >, <, & and " """ unescape_rules = { r'(?is)({([{%])[^%}]*?)(</?text:s.*?>)(.*?[%}]})': r'\1 \4', r'(?is)({([{%])[^%}]*?)(>)(.*?[%}]})' : r'\1>\4', r'(?is)({([{%])[^%}]*?)(<)(.*?[%}]})' : r'\1<\4', r'(?is)({([{%])[^%}]*?)(&)(.*?[%}]})' : r'\1&\4', r'(?is)({([{%])[^%}]*?)(")(.*?[%}]})' : r'\1"\4', } for regexp, replacement in unescape_rules.items(): subs_made = True while subs_made: xml_text, subs_made = re.subn(regexp, replacement, xml_text) return xml_text
def datetime_parsing(text, base_date=datetime.now()): """ Extract datetime objects from a string of text. """ matches = [] found_array = [] # Find the position in the string for expression, function in regex: for match in expression.finditer(text): matches.append((match.group(), function(match, base_date), match.span())) # Wrap the matched text with TAG element to prevent nested selections for match, value, spans in matches: subn = re.subn( '(?!<TAG[^>]*?>)' + match + '(?![^<]*?</TAG>)', '<TAG>' + match + '</TAG>', text ) text = subn[0] is_substituted = subn[1] if is_substituted != 0: found_array.append((match, value, spans)) # To preserve order of the match, sort based on the start position return sorted(found_array, key=lambda match: match and match[2][0])
def getTextFromSoup(htmlsoup): #print(htmlsoup) text = str(htmlsoup) imgurls = [] imgs = htmlsoup.find_all('img') #print_list(imgs) for img in imgs: imgurls.append('http://acm.hdu.edu.cn%s' % img['src']) #print(htmlsoup) if len(imgurls) != 0: for url in imgurls: text, number = re.subn(r'<img.+?>', r'![image](%s)' % url, text) #print(text) text, number = re.subn(r'<br>', '\n', text) text, number = re.subn(r'<.+?>', '', text) return text #print(text) #print(seh.group()) #print_list(imgurls) # res = htmlsoup.replace('<br/>', '\r\n') # result, number = re.subn('<.+?>', '', res) # print(result) # print(number)
def translate_enumeration(types, enum, constructor): name = enum["name"] name = rename_enumeration(name) types[name] = this = {"type":constructor, "ctype":"i32"} this["constants"] = constants = {} # turns out the "expand" was insufficient nearly everywhere. prefix = "^VK_" for cell in split_case(name): prefix += "(" + cell.upper() + "_)?" for tag in enum: if tag.name == "enum": name_ = re.subn(prefix, "", tag["name"])[0] if "bitpos" in tag.attrs: value = 1 << int(tag["bitpos"]) elif tag["value"].startswith("0x"): value = int(tag["value"], 16) else: value = int(tag["value"]) constants[name_] = value return name
def translate_enumeration(enum, constructor): name = enum["name"] name = rename_enumeration(name) print "{0} = {1}({0!r}, {{".format(name, constructor) # turns out the "expand" was insufficient nearly everywhere. prefix = "^VK_" for cell in split_case(name): prefix += "(" + cell.upper() + "_)?" for tag in enum: if tag.name == "enum": name_ = re.subn(prefix, "", tag["name"])[0] if "bitpos" in tag.attrs: value = "1 << " + tag["bitpos"] else: value = tag["value"] print " {!r:<50}: {!s},".format(name_, value) print "})" return name
def fix_unpair_tag(text, tag, count_selfclosing=True): """ Fix self-closing unpair tags and return (new_text, replacements_count) tuple. tag parameter must contains only name of the tag, for example, "br" for <br>. If self-closing tags are correct, set count_selfclosing param to False. self-closing still will be corrected in the name of unification, but those replacements will not be counted. Used in 2nd error. """ correct_tag = "<{}>".format(tag) all_tags = r"<[/\\ ]*{}[/\\ ]*>".format(tag) if count_selfclosing: correct = count_ignore_case(text, correct_tag) else: correct = len(re.findall(r"<{}\s*/?>".format(tag), text)) (text, fixed) = re.subn(all_tags, correct_tag, text, flags=re.I) return (text, fixed - correct)
def fix_pair_tag(text, tag, recursive=False): """ Fix self-closing pair tags and return (new_text, replacements_count) tuple. tag parameter must contains only name of the tag, for example, "b" for <b>. recursive flag must be True if nested tags are correct. The default value is False. Checks tag balance: if something going wrong, function willn't change anything. Used in 2nd error. """ old_text = text correct_tag = "</{}>".format(tag) (text, fixed1) = re.subn(r"<[ ]*{}[ ]*[/\\]>".format(tag), correct_tag, text, flags=re.I) (text, fixed2) = re.subn(r"<\\[ ]*{}[ ]*>".format(tag), correct_tag, text, flags=re.I) if check_tag_balance(text, tag, recursive): return (text, fixed1 + fixed2) else: return (old_text, 0)
def error_002_invalid_tags(text): """Fix the error and return (new_text, replacements_count) tuple.""" (text, fixed_br) = fix_unpair_tag(text, "br", count_selfclosing=False) (text, fixed_hr) = fix_unpair_tag(text, "hr", count_selfclosing=False) fixed_total = fixed_br + fixed_hr (text, fixed_clear) = re.subn(r"<br clear=\"?(left|right)\"?\s*/?>", "{{clear|\\1}}", text) (text, fixed_clear_all) = re.subn(r"<br clear=\"?(?:all|both)\"?\s*/?>", "{{clear}}", text) fixed_total += fixed_clear + fixed_clear_all (text, fixed_small) = fix_pair_tag(text, "small") (text, fixed_center) = fix_pair_tag(text, "center") (text, fixed_div) = fix_pair_tag(text, "div", recursive=True) (text, fixed_span) = fix_pair_tag(text, "span", recursive=True) fixed_total += fixed_small + fixed_center + fixed_div + fixed_span return (text, fixed_total)
def error_068_interwiki_link(text): """ Fix links to Special:BookSearch and direct links, written like interwiki ones. For example, for ruwiki fixes [[:ru:Example|Something]]. Return (new_text, replacements_count) tuple. """ def _check_link(match_obj): """Check if founded link is a link to a file or a category and insert extra ":" if so.""" link = match_obj.group(2).lstrip().lower() if re.search(r"^" + IMAGE, link) is None and re.search(r"^" + CATEGORY, link) is None: return match_obj.group(1) + match_obj.group(2) else: return match_obj.group(1) + ":" + match_obj.group(2) # bot will not fix links without a pipe: manual control needed (text, direct) = re.subn(r"(\[\[):{}:([^|\[\]\n]+\|[^|\[\]\n]+\]\])".format(LANG_CODE), _check_link, text, flags=re.I) (text, books) = re.subn(r"\[\[:..:Special:BookSources/\d+X?\|(ISBN [0-9\-X]+)\]\]", "\\1", text, flags=re.I) return (text, direct + books)
def error_086_ext_link_two_brackets(text): """Fix some cases and return (new_text, replacements_count) tuple.""" # case: [[http://youtube.com/|YouTube]] def _process_link(match_obj): """Deals with founded wiki-link.""" link = match_obj.group(1) name = match_obj.group(2) if "wikipedia.org" in link.lower(): link = re.sub(" ", "_", link) else: link = re.sub(" ", "%20", link) return "[" + link + " " + name + "]" exp1 = r"\[\[(https?://[^|\[\]\n]+)\|([^|\[\]\n]+)\]\]" (text, count1) = re.subn(exp1, _process_link, text, flags=re.I) # case: [[http://youtube.com YouTube]] exp2 = r"\[(\[https?://[^\[\]\n]+\])\]" (text, count2) = re.subn(exp2, "\\1", text, flags=re.I) return (text, count1 + count2)
def error_104_quote_marks_in_refs(text): """Fix the error and return (new_text, replacements_count) tuple.""" count3 = 0 def quote_ref(match): """Quote ref name if it's neccessary.""" #pylint: disable=undefined-variable nonlocal count3 name = match.group(2) if re.search(r"group\s*=", name): return match.group(0) if "\"" in name or re.match(r"^'.*'$", name): # ref is already quotetd (all is ok) or has a quote (dangerous to fix) return match.group(0) if re.search(r"['/\\=?#\s]", name): count3 += 1 return match.group(1) + "\"" + name + "\"" + match.group(3) return match.group(0) (text, count1) = re.subn(r"(<ref\s+name\s*=\s*\"[^\">]+?)(\s*/?>)", "\\1\"\\2", text) (text, count2) = re.subn(r"(<ref\s+name\s*=\s*)([^\">]+?\"\s*/?>)", "\\1\"\\2", text) text = re.sub(r"(<ref\s+name\s*=\s*)(.*?)(\s*/?>)", quote_ref, text) return (text, count1 + count2 + count3)
def modify_start_url(self, start_url): """ Given a SAML redirect URL, parse it and change the ID to a consistent value, so the request is always identical. """ # Parse the SAML Request URL to get the XML being sent to TestShib url_parts = urlparse(start_url) query = dict((k, v[0]) for (k, v) in parse_qs(url_parts.query).items()) xml = OneLogin_Saml2_Utils.decode_base64_and_inflate( query['SAMLRequest'] ) # Modify the XML: xml = xml.decode() xml, changed = re.subn(r'ID="[^"]+"', 'ID="TEST_ID"', xml) self.assertEqual(changed, 1) # Update the URL to use the modified query string: query['SAMLRequest'] = OneLogin_Saml2_Utils.deflate_and_base64_encode( xml ) url_parts = list(url_parts) url_parts[4] = urlencode(query) return urlunparse(url_parts)
def removebackspaces(text): backspace_or_eol = r'(.\010)|(\033\[K)' n = 1 while n > 0: text, n = re.subn(backspace_or_eol, '', text, 1) return text
def load(self, name, model, alias_list=None): param_dict = self._load(name) if alias_list: new_dict = {} for key, value in param_dict.items(): for src, dst in alias_list: if not key.startswith(src): continue print(key) if isinstance(dst, widgets.Widget): dst = dst.prefix() key, _ = re.subn('^{}'.format(src), dst, key) new_dict[key] = value param_dict = new_dict model.parameters = param_dict
def checksum(line): line, n = re.subn('-', '1', line) # replace dashes with 1's line, n = re.subn('[^\d]', '', line) # remove non-digit chars # compare with last digit with sum modulo 10 digits = [int(x) for x in line[:-1]] return int(line[-1]) == sum(digits) % 10
def stamp_source(base_dir, version, dry_run=False): """update version string in passlib dist""" path = os.path.join(base_dir, "passlib", "__init__.py") with open(path) as fh: input = fh.read() output, count = re.subn('(?m)^__version__\s*=.*$', '__version__ = ' + repr(version), input) assert count == 1, "failed to replace version string" if not dry_run: os.unlink(path) # sdist likes to use hardlinks with open(path, "w") as fh: fh.write(output)
def clear_text(text=''): t_text = text.replace(r'\xa0', ' ') t_text = t_text.replace('????', '') t_text = re.subn('\(.* Bytes, ????: .*\)', '', t_text)[0] t_text = re.subn('\d*-\d*-\d* \d*:\d* ??', '', t_text)[0] t_text = re.subn('.*\.png\s', '', t_text)[0] return t_text.strip()
def safe_name(self): if self.name is not None: return re.subn(r"[^\w]", "_", self.name)[0]
def subn(self, repl, string, count): return re_sub_ex(self.pattern, self.compiled, repl, string, count, self.flags) # -- Parser ------------------------------------------------------------------
def clean(s, unicode=True): flags = re.UNICODE if unicode else 0 return re.subn(r'(\s){2,}', '\g<1>', s, flags)[0].strip()
def refine_item(self, response, item): birth_death = text.clean_extract(response, './/div[@id="maincontent"]/p[1]/em' ).split('<br>')[0] birth_death = re.subn(r'<[^>]+>', '', birth_death)[0].split('d.') if len(birth_death) == 2: birth, death = birth_death birth = birth[len('b.'):].strip() death = death.strip() item['birth'] = birth if birth != '?' else None item['death'] = death if death != '?' else None return super(MunksrollSpider, self).refine_item(response, item)
def replace(self, text): s = text for (pattern, repl) in self.patterns: (s, count) = re.subn(pattern, repl, s) return s
def html2bash(str):#??HTML p?? result , number = re.subn(constant.PATTERN_BR,'\n',str) result , number = re.subn(constant.PATTERN_P,'\n',result) result , number = re.subn(constant.PATTERN_F_P, '\n', result) return result
def money_str(self, with_symbol=True): """ Format money using the currency's format string. If amount is negative, the sign is moved to the front of the string >>> Money(-12).money_str() -$12.00 >>> Money(123456.789).money_str() $123,456.79 >>> Money(987654321, currency=CURRENCIES['IDR']).money_str() Rp 987654321 >>> Money(-987654321, currency=CURRENCIES['IDR']).money_str() Rp -987654321 """ # Strip the sign and move it to the front of the string #fails if any component cannot be converted to ascii -kurtis string = self.currency.format_str.format( amount=abs(self.amount), symbol=self.currency.symbol, code=self.currency.code, name=self.currency.name, decimals=self.currency.decimals) if self.amount_raw < 0: string, n = re.subn(r'\s', r' -', string, count=1) if n == 0: string = '-' + string return string
def test_as_replace_function(self): """Test that replace can be used as a replace function.""" text = "this will be fed into re.subn! Here we go! this will be fed into re.subn! Here we go!" text_pattern = r"(?P<first>this )(?P<second>.*?)(!)" pattern = bre.compile_search(text_pattern) replace = bre.compile_replace(pattern, r'\c\g<first>is awesome\g<3>') result, count = pattern.subn(replace, text) self.assertEqual(result, "This is awesome! Here we go! This is awesome! Here we go!") self.assertEqual(count, 2)
def test_sub_wrong_replace_type(self): """Test sending wrong type into `sub`, `subn`.""" pattern = re.compile('test') replace = bre.compile_replace(pattern, 'whatever', bre.FORMAT) with pytest.raises(ValueError) as excinfo: bre.sub(pattern, replace, 'test') assert "Compiled replace cannot be a format object!" in str(excinfo.value) with pytest.raises(ValueError) as excinfo: bre.subn(pattern, replace, 'test') assert "Compiled replace cannot be a format object!" in str(excinfo.value)
def test_sub_wrong_replace_format_type(self): """Test sending wrong format type into `sub`, `subn`.""" pattern = re.compile('test') replace = bre.compile_replace(pattern, 'whatever') with pytest.raises(ValueError) as excinfo: bre.subf(pattern, replace, 'test') assert "Compiled replace is not a format object!" in str(excinfo.value) with pytest.raises(ValueError) as excinfo: bre.subfn(pattern, replace, 'test') assert "Compiled replace is not a format object!" in str(excinfo.value)
def test_subn(self): """Test that `subn` works.""" self.assertEqual( bre.subn(r'tset', 'test', r'This is a tset for subn! This is a tset for subn!'), ('This is a test for subn! This is a test for subn!', 2) )
def compile_replace(pattern, repl, flags=0): """Construct a method that can be used as a replace method for `sub`, `subn`, etc.""" call = None if pattern is not None and isinstance(pattern, RE_TYPE): if isinstance(repl, (compat.string_type, compat.binary_type)): repl = ReplaceTemplate(pattern, repl, bool(flags & FORMAT)) call = Replace( functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash ) elif isinstance(repl, Replace): if flags: raise ValueError("Cannot process flags argument with a compiled pattern!") if repl.pattern_hash != hash(pattern): raise ValueError("Pattern hash doesn't match hash in compiled replace!") call = repl elif isinstance(repl, ReplaceTemplate): if flags: raise ValueError("Cannot process flags argument with a ReplaceTemplate!") call = Replace( functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash ) else: raise TypeError("Not a valid type!") else: raise TypeError("Pattern must be a compiled regular expression!") return call # Convenience methods like re has, but slower due to overhead on each call. # It is recommended to use compile_search and compile_replace
def subfn(pattern, format, string, count=0, flags=0): # noqa B002 """Apply `subn` after applying backrefs.""" is_replace = _is_replace(format) is_string = isinstance(format, (compat.string_type, compat.binary_type)) if is_replace and not format.use_format: raise ValueError("Compiled replace is not a format object!") pattern = compile_search(pattern, flags) rflags = FORMAT if is_string else 0 return re.subn( pattern, (compile_replace(pattern, format, flags=rflags) if is_replace or is_string else format), string, count, flags )
def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
def test_get_memory_info_without_swap(self): sample = re.subn(r"Swap(Free|Total): *\d+ kB", r"Swap\1: 0", SAMPLE_MEMORY_INFO)[0] filename = self.makeFile(sample) memstats = MemoryStats(filename) self.assertEqual(memstats.total_swap, 0) self.assertEqual(memstats.free_swap, 0) self.assertEqual(memstats.used_swap, 0) self.assertEqual(memstats.used_swap_percentage, 0) self.assertEqual(memstats.free_swap_percentage, 0) self.assertEqual(type(memstats.used_swap_percentage), float) self.assertEqual(type(memstats.free_swap_percentage), float)