我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用regex.U。
def __init__(self, arpabet='arpabet', ligatures=False, cedict_file=None): """Construct a Flite "wrapper" Args: arpabet (str): file containing ARPAbet to IPA mapping ligatures (bool): if True, use non-standard ligatures instead of standard IPA cedict_filename (str): path to CC-CEDict dictionary (included for compatibility) """ arpabet = pkg_resources.resource_filename(__name__, os.path.join('data', arpabet + '.csv')) self.arpa_map = self._read_arpabet(arpabet) self.chunk_re = re.compile(r'(\p{L}+|[^\p{L}]+)', re.U) self.puncnorm = PuncNorm() self.ligatures = ligatures self.ft = panphon.FeatureTable()
def test_getattr(self): self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)') self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.A | regex.I | regex.DEFAULT_VERSION) self.assertEqual(regex.compile(u"(?i)(a)(b)").flags, regex.I | regex.U | regex.DEFAULT_VERSION) self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2) self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {}) self.assertEqual(regex.compile("(?i)(?P<first>a)(?P<other>b)").groupindex, {'first': 1, 'other': 2}) self.assertEqual(regex.match("(a)", "a").pos, 0) self.assertEqual(regex.match("(a)", "a").endpos, 1) self.assertEqual(regex.search("b(c)", "abcdef").pos, 0) self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6) self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3)) self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3)) self.assertEqual(regex.match("(a)", "a").string, 'a') self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1))) self.assertEqual(repr(type(regex.match("(a)", "a").re)), self.PATTERN_CLASS) # Issue 14260. p = regex.compile(r'abc(?P<n>def)') p.groupindex["n"] = 0 self.assertEqual(p.groupindex["n"], 1)
def test_inline_flags(self): # Bug #1700. upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below p = regex.compile(upper_char, regex.I | regex.U) self.assertEqual(bool(p.match(lower_char)), True) p = regex.compile(lower_char, regex.I | regex.U) self.assertEqual(bool(p.match(upper_char)), True) p = regex.compile('(?i)' + upper_char, regex.U) self.assertEqual(bool(p.match(lower_char)), True) p = regex.compile('(?i)' + lower_char, regex.U) self.assertEqual(bool(p.match(upper_char)), True) p = regex.compile('(?iu)' + upper_char) self.assertEqual(bool(p.match(lower_char)), True) p = regex.compile('(?iu)' + lower_char) self.assertEqual(bool(p.match(upper_char)), True) self.assertEqual(bool(regex.match(r"(?i)a", "A")), True) self.assertEqual(bool(regex.match(r"a(?i)", "A")), True) self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True) self.assertEqual(regex.match(r"a(?iV1)", "A"), None)
def __init__(self): regex_tokenizer.__init__(self, r'[\d\.]+|\w+|[^\w\s]+', flags=regex.U)
def _fields_to_function_metathesis(self, a, X, Y): left = r'(?P<X>{}){}(?P<Y>{})'.format(X, a, Y) regexp = re.compile(left) def rewrite(m): d = {k: none2str(v) for k, v in m.groupdict().items()} return '{}{}{}{}'.format(d['X'], d['sw2'], d['sw1'], d['Y']) return lambda w: regexp.sub(rewrite, w, re.U)
def _fields_to_function(self, a, b, X, Y): left = r'(?P<X>{})(?P<a>{})(?P<Y>{})'.format(X, a, Y) regexp = re.compile(left) def rewrite(m): d = {k: none2str(v) for k, v in m.groupdict().items()} return '{}{}{}'.format(d['X'], b, d['Y']) return lambda w: regexp.sub(rewrite, w, re.U)
def __init__(self, infile=sys.stdin): """Validate Unicode IPA from file relative to panphon database. infile -- File from which input is taken; by default, STDIN. """ self.ws_punc_regex = re.compile(r'[," \t\n]', re.V1 | re.U) self.ft = panphon.FeatureTable() self._validate_file(infile)
def single_case(self, i, case): """Uppercase or lowercase the next character.""" attr = "lower" if case == _LOWER else "upper" self.single_stack.append(attr) try: t = next(i) if len(t) > 1: if self.use_format and t[0:1] == self._lc_bracket: self.handle_format_group(t[1:-1].strip()) else: c = t[1:] first = c[0:1] if first.isdigit() and (self.use_format or len(c) == 3): value = int(c, 8) if self.binary: if value > 0xFF: # Re fails on octal greater than 0o377 or 0xFF raise ValueError("octal escape value outside of range 0-0o377!") value = ord(getattr(compat.uchr(value), self.get_single_stack())()) self.result.append(self.string_convert('\\%03o' % value)) else: value = ord(getattr(compat.uchr(value), self.get_single_stack())()) self.result.append(('\\%03o' if value <= 0xFF else '\\u%04x') % value) elif not self.use_format and (c[0:1].isdigit() or c[0:1] == self._group): self.handle_group(t) elif c == self._uc: self.single_case(i, _UPPER) elif c == self._lc: self.single_case(i, _LOWER) elif c == self._uc_span: self.span_case(i, _UPPER) elif c == self._lc_span: self.span_case(i, _LOWER) elif c == self._end: self.end_found = True elif not self.binary and first == self._unicode_name: uc = unicodedata.lookup(t[3:-1]) value = ord(getattr(uc, self.get_single_stack())()) self.result.append(("\\u%04x" if value <= 0xFFFF else "\\U%08x") % value) elif ( not self.binary and (first == self._unicode_narrow or (not NARROW and first == self._unicode_wide)) ): uc = compat.uchr(int(t[2:], 16)) value = ord(getattr(uc, self.get_single_stack())()) self.result.append(("\\u%04x" if value <= 0xFFFF else "\\U%08x") % value) elif first == self._hex: hc = chr(int(t[2:], 16)) self.result.append( self.string_convert("\\x%02x" % ord(getattr(hc, self.get_single_stack())())) ) else: self.get_single_stack() self.result.append(t) else: self.result.append(getattr(t, self.get_single_stack())()) except StopIteration: pass