Python regex 模块,U 实例源码

我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用regex.U

项目:epitran    作者:dmort27    | 项目源码 | 文件源码
def __init__(self, arpabet='arpabet', ligatures=False, cedict_file=None):
        """Construct a Flite "wrapper"

        Args:
            arpabet (str): file containing ARPAbet to IPA mapping
            ligatures (bool): if True, use non-standard ligatures instead of
                              standard IPA
            cedict_filename (str): path to CC-CEDict dictionary (included for
                                   compatibility)
        """
        arpabet = pkg_resources.resource_filename(__name__, os.path.join('data', arpabet + '.csv'))
        self.arpa_map = self._read_arpabet(arpabet)
        self.chunk_re = re.compile(r'(\p{L}+|[^\p{L}]+)', re.U)
        self.puncnorm = PuncNorm()
        self.ligatures = ligatures
        self.ft = panphon.FeatureTable()
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_getattr(self):
        self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)')
        self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.A | regex.I |
          regex.DEFAULT_VERSION)
        self.assertEqual(regex.compile(u"(?i)(a)(b)").flags, regex.I | regex.U
          | regex.DEFAULT_VERSION)
        self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2)
        self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {})

        self.assertEqual(regex.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
          {'first': 1, 'other': 2})

        self.assertEqual(regex.match("(a)", "a").pos, 0)
        self.assertEqual(regex.match("(a)", "a").endpos, 1)

        self.assertEqual(regex.search("b(c)", "abcdef").pos, 0)
        self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6)
        self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3))
        self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3))

        self.assertEqual(regex.match("(a)", "a").string, 'a')
        self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1)))
        self.assertEqual(repr(type(regex.match("(a)", "a").re)),
          self.PATTERN_CLASS)

        # Issue 14260.
        p = regex.compile(r'abc(?P<n>def)')
        p.groupindex["n"] = 0
        self.assertEqual(p.groupindex["n"], 1)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_inline_flags(self):
        # Bug #1700.
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below
        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below

        p = regex.compile(upper_char, regex.I | regex.U)
        self.assertEqual(bool(p.match(lower_char)), True)

        p = regex.compile(lower_char, regex.I | regex.U)
        self.assertEqual(bool(p.match(upper_char)), True)

        p = regex.compile('(?i)' + upper_char, regex.U)
        self.assertEqual(bool(p.match(lower_char)), True)

        p = regex.compile('(?i)' + lower_char, regex.U)
        self.assertEqual(bool(p.match(upper_char)), True)

        p = regex.compile('(?iu)' + upper_char)
        self.assertEqual(bool(p.match(lower_char)), True)

        p = regex.compile('(?iu)' + lower_char)
        self.assertEqual(bool(p.match(upper_char)), True)

        self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)
        self.assertEqual(bool(regex.match(r"a(?i)", "A")), True)
        self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True)
        self.assertEqual(regex.match(r"a(?iV1)", "A"), None)
项目:telugu-nlp    作者:chsasank    | 项目源码 | 文件源码
def __init__(self):
        regex_tokenizer.__init__(self, r'[\d\.]+|\w+|[^\w\s]+', flags=regex.U)
项目:epitran    作者:dmort27    | 项目源码 | 文件源码
def _fields_to_function_metathesis(self, a, X, Y):
        left = r'(?P<X>{}){}(?P<Y>{})'.format(X, a, Y)
        regexp = re.compile(left)

        def rewrite(m):
            d = {k: none2str(v) for k, v in m.groupdict().items()}
            return '{}{}{}{}'.format(d['X'], d['sw2'], d['sw1'], d['Y'])

        return lambda w: regexp.sub(rewrite, w, re.U)
项目:epitran    作者:dmort27    | 项目源码 | 文件源码
def _fields_to_function(self, a, b, X, Y):
        left = r'(?P<X>{})(?P<a>{})(?P<Y>{})'.format(X, a, Y)
        regexp = re.compile(left)

        def rewrite(m):
            d = {k: none2str(v) for k, v in m.groupdict().items()}
            return '{}{}{}'.format(d['X'], b, d['Y'])

        return lambda w: regexp.sub(rewrite, w, re.U)
项目:panphon    作者:dmort27    | 项目源码 | 文件源码
def __init__(self, infile=sys.stdin):
        """Validate Unicode IPA from file relative to panphon database.

        infile -- File from which input is taken; by default, STDIN.
        """
        self.ws_punc_regex = re.compile(r'[," \t\n]', re.V1 | re.U)
        self.ft = panphon.FeatureTable()
        self._validate_file(infile)
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def single_case(self, i, case):
            """Uppercase or lowercase the next character."""

            attr = "lower" if case == _LOWER else "upper"
            self.single_stack.append(attr)
            try:
                t = next(i)
                if len(t) > 1:
                    if self.use_format and t[0:1] == self._lc_bracket:
                        self.handle_format_group(t[1:-1].strip())
                    else:
                        c = t[1:]
                        first = c[0:1]
                        if first.isdigit() and (self.use_format or len(c) == 3):
                            value = int(c, 8)
                            if self.binary:
                                if value > 0xFF:
                                    # Re fails on octal greater than 0o377 or 0xFF
                                    raise ValueError("octal escape value outside of range 0-0o377!")
                                value = ord(getattr(compat.uchr(value), self.get_single_stack())())
                                self.result.append(self.string_convert('\\%03o' % value))
                            else:
                                value = ord(getattr(compat.uchr(value), self.get_single_stack())())
                                self.result.append(('\\%03o' if value <= 0xFF else '\\u%04x') % value)
                        elif not self.use_format and (c[0:1].isdigit() or c[0:1] == self._group):
                                self.handle_group(t)
                        elif c == self._uc:
                            self.single_case(i, _UPPER)
                        elif c == self._lc:
                            self.single_case(i, _LOWER)
                        elif c == self._uc_span:
                            self.span_case(i, _UPPER)
                        elif c == self._lc_span:
                            self.span_case(i, _LOWER)
                        elif c == self._end:
                            self.end_found = True
                        elif not self.binary and first == self._unicode_name:
                            uc = unicodedata.lookup(t[3:-1])
                            value = ord(getattr(uc, self.get_single_stack())())
                            self.result.append(("\\u%04x" if value <= 0xFFFF else "\\U%08x") % value)
                        elif (
                            not self.binary and
                            (first == self._unicode_narrow or (not NARROW and first == self._unicode_wide))
                        ):
                            uc = compat.uchr(int(t[2:], 16))
                            value = ord(getattr(uc, self.get_single_stack())())
                            self.result.append(("\\u%04x" if value <= 0xFFFF else "\\U%08x") % value)
                        elif first == self._hex:
                            hc = chr(int(t[2:], 16))
                            self.result.append(
                                self.string_convert("\\x%02x" % ord(getattr(hc, self.get_single_stack())()))
                            )
                        else:
                            self.get_single_stack()
                            self.result.append(t)
                else:
                    self.result.append(getattr(t, self.get_single_stack())())

            except StopIteration:
                pass