我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用re.LOCALE。
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
def _encode_regex(name, value, dummy0, dummy1): """Encode a python regex or bson.regex.Regex.""" flags = value.flags # Python 2 common case if flags == 0: return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00" # Python 3 common case elif flags == re.UNICODE: return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00" else: sflags = b"" if flags & re.IGNORECASE: sflags += b"i" if flags & re.LOCALE: sflags += b"l" if flags & re.MULTILINE: sflags += b"m" if flags & re.DOTALL: sflags += b"s" if flags & re.UNICODE: sflags += b"u" if flags & re.VERBOSE: sflags += b"x" sflags += b"\x00" return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
def findReplaceFlags(self, tokens): """Map letters in |tokens| to re flags.""" flags = re.MULTILINE if 'i' in tokens: flags |= re.IGNORECASE if 'l' in tokens: # Affects \w, \W, \b, \B. flags |= re.LOCALE if 'm' in tokens: # Affects ^, $. flags |= re.MULTILINE if 's' in tokens: # Affects .. flags |= re.DOTALL if 'x' in tokens: # Affects whitespace and # comments. flags |= re.VERBOSE if 'u' in tokens: # Affects \w, \W, \b, \B. flags |= re.UNICODE if 0: tokens = re.sub('[ilmsxu]', '', tokens) if len(tokens): self.setMessage('unknown regex flags '+tokens) return flags
def pos_tag_text(line, token_pattern=token_pattern, exclude_stopword=stopwords, encode_digit=False): token_pattern = re.compile(token_pattern, flags = re.UNICODE | re.LOCALE) for name in ["question1", "question2"]: l = line[name] ## tokenize tokens = [x.lower() for x in token_pattern.findall(l)] ## stem #tokens=l.lower().split() #print tokens tokens = stem_tokens(tokens, english_stemmer) line[name+'_stem']=' '.join(tokens) #print tokens if exclude_stopword: tokens = [x for x in tokens if x not in stopwords] tags = pos_tag(tokens) tags_list = [t for w,t in tags] tags_str = " ".join(tags_list) #print tags_str line[name+'_pos_tag'] = tags_str return line[[ u'question1_stem', u'question1_pos_tag', u'question2_stem', u'question2_pos_tag']]
def get_flags(self): flags = 0 for option in self.options: if option == 'i': flags |= re.IGNORECASE elif option == 'l': flags |= re.LOCALE elif option == 'm': flags |= re.MULTILINE elif option == 's': flags |= re.DOTALL elif option == 'u': flags |= re.UNICODE return flags
def processLine(line): titlep=re.compile('^# (\\w+.+$)', re.LOCALE) #group 1 quotep1=re.compile('^\\t> (.+$)', re.LOCALE) #group 1 quotep2=re.compile('^\\t (.+$)', re.LOCALE) #group 1 notep1=re.compile('^\\t- (.+$)', re.LOCALE) #group 1 notep2=re.compile('^\\t (.+$)', re.LOCALE) #group 1 citep=re.compile('^\\t\\t- (@\\w+)') #group1 tagsp=re.compile('^\\t\\t(- Tags: | {8})(.+$)') #group2 ctimep=re.compile('^\\t\\t- Ctime: (.+$)') #group1 patterns={'title': [titlep,1],\ 'quote1': [quotep1,1],\ 'quote2': [quotep2,1],\ 'note1': [notep1,1],\ 'note2': [notep2,1],\ 'cite': [citep,1],\ 'tags': [tagsp,2],\ 'ctime': [ctimep,1]\ } for kk,vv in patterns.items(): m=vv[0].match(line) if m: return kk,m.group(vv[1]).encode('utf8') return None,'' #------------Organize by tags and save------------
def flags(key): flag = 0 if 'a' in key: flag += re.ASCII if 'i' in key: flag += re.IGNORECASE if 'l' in key: flag += re.LOCALE if 'm' in key: flag += re.MULTILINE if 's' in key: flag += re.DOTALL if 'x' in key: flag += re.VERBOSE return flag
def _regex_from_encoded_pattern(s): """'foo' -> re.compile(re.escape('foo')) '/foo/' -> re.compile('foo') '/foo/i' -> re.compile('foo', re.I) """ if s.startswith('/') and s.rfind('/') != 0: # Parse it: /PATTERN/FLAGS idx = s.rfind('/') pattern, flags_str = s[1:idx], s[idx+1:] flag_from_char = { "i": re.IGNORECASE, "l": re.LOCALE, "s": re.DOTALL, "m": re.MULTILINE, "u": re.UNICODE, } flags = 0 for char in flags_str: try: flags |= flag_from_char[char] except KeyError: raise ValueError("unsupported regex flag: '%s' in '%s' " "(must be one of '%s')" % (char, s, ''.join(list(flag_from_char.keys())))) return re.compile(s[1:idx], flags) else: # not an encoded regex return re.compile(re.escape(s)) # Recipe: dedent (0.1.2)
def is_valid_blacklist_pattern(cls, pattern): """ If value contains at least five characters (not spaces), consider this a valid pattern :param pattern: The regex pattern to check :return: True if pattern is valid """ matches = re.findall('[\S]+', pattern, re.LOCALE) if len(matches) < 5: return False if any(re.search(pattern, t, re.LOCALE) for t in ['', ' ', 'JUST SOME TEST', "\n"]): return False return True
def is_blacklisted(cls, value): return any(re.search(p, value, re.LOCALE) for p in cls.read_items() if p.strip())
def test_special_escapes(self): self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx").group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd").group(1), "bx") self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None) self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx").group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd").group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None) self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a").group(0), "1aa! a") self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a", re.LOCALE).group(0), "1aa! a") self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a", re.UNICODE).group(0), "1aa! a")
def test_getlower(self): import _sre self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
def test_special_escapes(self): self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx").group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd").group(1), "bx") self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.LOCALE).group(1), "bx") if have_unicode: self.assertEqual(re.search(r"\b(b.)\b", "abcd abc bcd bx", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) self.assertEqual(re.search(r"\b(b.)\b", u"abcd abc bcd bx").group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", u"abc bcd bc abxd").group(1), "bx") self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M)) self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a").group(0), "1aa! a") self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a", re.LOCALE).group(0), "1aa! a") if have_unicode: self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a", re.UNICODE).group(0), "1aa! a")
def test_getlower(self): import _sre self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) if have_unicode: self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")