Python regex 模块,match() 实例源码


项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def __exit__(self, exc_type, exc_value, tb):
        if exc_type is None:
                exc_name = self.expected.__name__
            except AttributeError:
                exc_name = str(self.expected)
            raise self.failureException(
                "%s not raised" % exc_name)
        if not issubclass(exc_type, self.expected):
            # let unexpected exceptions pass through
            return False
        self.exception = exc_value # store for later retrieval
        if self.expected_regexp is None:
            return True

        expected_regexp = self.expected_regexp
        if isinstance(expected_regexp, basestring):
            expected_regexp = re.compile(expected_regexp)
        if not
            raise self.failureException('"%s" does not match "%s"' %
                     (expected_regexp.pattern, str(exc_value)))
        return True
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_re_groupref_exists(self):
        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:],
          ('(a)', '(', 'a'))
        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a',
          None, 'a'))
        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab',
          'a', 'b'))
        self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd',
          None, 'd'))
        self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd',
          None, 'd'))
        self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a',
          'a', ''))

        # Tests for bug #1177831: exercise groups other than the first group.
        p = regex.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c'))
        self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_ignore_case(self):
        self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC')
        self.assertEqual(regex.match(u"abc", u"ABC", regex.I)[0], u'ABC')

        self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1],
          'a bb')
        self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b')
        self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1],
          'a bb')
        self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a')
        self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1],
          'a aa')
        self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1],
          'a a')
        self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1],
          'a aa')

        # Issue 3511.
        self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1))
        self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1))

        self.assertEqual(bool(regex.match(ur"(?iu)nao", u"nAo")), True)
        self.assertEqual(bool(regex.match(ur"(?iu)n\xE3o", u"n\xC3o")), True)
        self.assertEqual(bool(regex.match(ur"(?iu)n\xE3o", u"N\xC3O")), True)
        self.assertEqual(bool(regex.match(ur"(?iu)s", u"\u017F")), True)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_captures(self):
        self.assertEqual("(\w)+", "abc").captures(1), ['a', 'b',
        self.assertEqual("(\w{3})+", "abcdef").captures(0, 1),
          (['abcdef'], ['abc', 'def']))
          "").captures(1, 2), (['192', ], ['168', '0', '1']))
        self.assertEqual(regex.match(r"^([0-9A-F]{2}){4} ([a-z]\d){5}$",
          "3FB52A0C a2c4g3k9d3").captures(1, 2), (['3F', 'B5', '2A', '0C'],
          ['a2', 'c4', 'g3', 'k9', 'd3']))
          "aWbXcXdXeXfY").captures(1, 2, 3), (['aW'], ['bX', 'cX', 'dX', 'eX'],

        self.assertEqual(".*?(?=(.)+)b", "ab").captures(1),
        self.assertEqual(".*?(?>(.){0,2})d", "abcd").captures(1),
          ['b', 'c'])
        self.assertEqual("(.)+", "a").captures(1), ['a'])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_subscripted_captures(self):
          'abc').expandf('{0} {0[0]} {0[-1]}'), 'abc abc abc')
          'abc').expandf('{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}'),
          'c a b c c b a')
          'abc').expandf('{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}'),
          'c a b c c b a')

        self.assertEqual(regex.subf(r'(?P<x>.)+', r'{0} {0[0]} {0[-1]}',
          'abc'), 'abc abc abc')
          '{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}', 'abc'),
          'c a b c c b a')
          '{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}', 'abc'),
          'c a b c c b a')
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def preprocess(article):
    # Take out HTML escaping WikiExtractor didn't clean
    for k, v in article.items():
        article[k] = PARSER.unescape(v)

    # Filter some disambiguation pages not caught by the WikiExtractor
    if article['id'] in BLACKLIST:
        return None
    if '(disambiguation)' in article['title'].lower():
        return None
    if '(disambiguation page)' in article['title'].lower():
        return None

    # Take out List/Index/Outline pages (mostly links)
    if re.match(r'(List of .+)|(Index of .+)|(Outline of .+)',
        return None

    # Return doc with `id` set to `title`
    return {'id': article['title'], 'text': article['text']}
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def iternext(self):
            Iterate through characters of the string.

            Count escaped Q, E and backslash as a single char.

            if self.index > self.max_index:
                raise StopIteration

            char = self.string[self.index:self.index + 1]
            if char == self._b_slash:
                m = self._regex_search_ref.match(self.string[self.index + 1:])
                if m:
                    char += if else

            self.index += len(char)
            self.current = char
            return self.current

    # Break apart template patterns into char tokens
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def __init__(self, match, template):

            if template.binary:
                ctokens = ctok.btokens
                ctokens = ctok.utokens

            self.template = template
            self._esc_end = ctokens["esc_end"]
            self._end = ctokens["end"]
            self._lc = ctokens["lc"]
            self._lc_span = ctokens["lc_span"]
            self._uc = ctokens["uc"]
            self._uc_span = ctokens["uc_span"]
            self.index = -1
            self.end_found = False
            self.parent_span = []
            self.match = match
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def expand(self):
            """Using the template, expand the string."""

            sep = self.match.string[:0]
            text = []
            # Expand string
            for x in range(0, len(self.template.literals)):
                index = x
                l = self.template.literals[x]
                if l is None:
                    g_index = self.template.get_group_index(index)
                    span_case, single_case, capture = self.template.get_group_attributes(index)
                        l = self.match.captures(g_index)[capture]
                    except IndexError:
                        raise IndexError("'%d' is out of range!" % capture)
                    if span_case is not None:
                        l = getattr(l, span_case)()
                    if single_case is not None:
                        l = getattr(l[0:1], single_case)() + l[1:]

            return sep.join(text)
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def preprocess(article):
    # Take out HTML escaping WikiExtractor didn't clean
    for k, v in article.items():
        article[k] = PARSER.unescape(v)

    # Filter some disambiguation pages not caught by the WikiExtractor
    if article['id'] in BLACKLIST:
        return None
    if '(disambiguation)' in article['title'].lower():
        return None
    if '(disambiguation page)' in article['title'].lower():
        return None

    # Take out List/Index/Outline pages (mostly links)
    if re.match(r'(List of .+)|(Index of .+)|(Outline of .+)',
        return None

    # Return doc with `id` set to `title`
    return {'id': article['title'], 'text': article['text']}
项目:toshi-services-lib    作者:toshiapp    | 项目源码 | 文件源码
def decode_event_data(topic, data):
    if isinstance(data, str):
        data = data_decoder(data)
    name, types = _process_topic(topic)
    decoded = decode_abi(types, data)
    arguments = []
    for typ, val in zip(types, decoded):
        m = TYPES_RE.match(typ)
        if m is None:
        atyp, arr = m.groups()
        if arr is None or arr == '':
            arguments.append(_convert_type(atyp, val))
            arguments.append(_convert_array(atyp, arr[1:-1].split(']['), val))
    return arguments
项目:epitran    作者:dmort27    | 项目源码 | 文件源码
def _read_rule(self, i, line):
        line = line.strip()
        if line:
            line = unicodedata.normalize('NFC', unicodedata.normalize('NFD', line))
            s = re.match(r'(?P<symbol>::\w+::)\s*=\s*(?P<value>.+)', line)
            if s:
                self.symbols['symbol')] ='value')
                line = self._sub_symbols(line)
                r = re.match(r'(\S+)\s*->\s*(\S+)\s*/\s*(\S*)\s*[_]\s*(\S*)', line)
                    a, b, X, Y = r.groups()
                except AttributeError:
                    raise DatafileError('Line {}: "{}" cannot be parsed.'.format(i + 1, line))
                X, Y = X.replace('#', '^'), Y.replace('#', '$')
                a, b = a.replace('0', ''), b.replace('0', '')
                    if'[?]P[<]sw1[>].+[?]P[<]sw2[>]', a):
                        return self._fields_to_function_metathesis(a, X, Y)
                        return self._fields_to_function(a, b, X, Y)
                except Exception as e:
                    raise DatafileError('Line {}: "{}" cannot be compiled as regex: ?{}'.format(i + 1, line, e))
项目:joyodb    作者:leoboiko    | 项目源码 | 文件源码
def is_empty(line):
    # 'r' raw string so that doctest works with these special characters.
    r"""Detects blank lines.

    In the pdfbox conversion of the main table, blank lines seem to separate
    kanji entries.  But we can already detect the start of each kanji entry by
    the presence of the kanji itself, so we just skip blank lines.

    >>> is_empty('')
    >>> is_empty("\n")
    >>> is_empty(" \t  \n")
    >>> is_empty("\u3000") # IDEOGRAPHIC SPACE
    >>> is_empty("\u3000?\t\n")

    line = line.strip()
    return re.match('^$', line) != None
项目:joyodb    作者:leoboiko    | 项目源码 | 文件源码
def is_page_index(line):
    r"""Detects the page indices from the Joyo document.

    They usually look like this:

    >>> is_page_index('03?_???????_??NN.indd   107 2010/11/12   13:10:23')

    Pdfbox also generated a single page number (?) like this:

    >>> is_page_index('163')

    Content lines won't match:
    >>> is_page_index('\t \t \t ????\t ???\t')

    line = line.strip()

    # We just test whether it starts with a number.
    if re.match(r'^[0-9]', line):
项目:joyodb    作者:leoboiko    | 项目源码 | 文件源码
def __init__(self, kanji, reading, variation_of=None, kind=None):
        self.kanji = kanji
        if reading[0] == "\u3000":
            self.reading = reading[1:]
            self.uncommon = True
            self.reading = reading
            self.uncommon = False

        self.examples = list()

        if kind:
            self.kind = kind
            if re.match("\p{Katakana}", self.reading):
                self.kind = 'On'
                self.kind = 'Kun'

        self.variation_of = variation_of
        self.notes = list()
        self.alternate_orthographies = list()
项目:joyodb    作者:leoboiko    | 项目源码 | 文件源码
def test_alternate_orthographies(self):
        for k in joyodb.loaded_data.kanjis:
            for r in k.readings:
                for a in r.alternate_orthographies:
                    looks_like_alternate = re.match("^(\p{Han})\p{Hiragana}*$", a)
                    alt_kanji_ch = looks_like_alternate[1]

                    alt_kanji_list = [obj for obj in joyodb.loaded_data.kanjis
                                      if obj.kanji == alt_kanji_ch]
                    assert(len(alt_kanji_list) == 1)
                    alt_kanji = alt_kanji_list[0]

                    for their_readings in alt_kanji.readings:
                        for their_alternates in their_readings.alternate_orthographies:
                            if k.kanji in their_alternates:
项目:toshi-id-service    作者:toshiapp    | 项目源码 | 文件源码
def validate_username(username):
    return regex.match('^[a-zA-Z][a-zA-Z0-9_]{2,59}$', username)
项目:toshi-id-service    作者:toshiapp    | 项目源码 | 文件源码
def get(self, username):

        sql = ("SELECT users.*, array_agg(app_categories.category_id) AS category_ids, "
               "array_agg(categories.tag) AS category_tags, "
               "array_agg( AS category_names "
               "FROM users LEFT JOIN app_categories "
               "ON users.toshi_id = app_categories.toshi_id "
               "LEFT JOIN category_names ON app_categories.category_id = category_names.category_id "
               "AND category_names.language = $1 "
               "LEFT JOIN categories ON app_categories.category_id = categories.category_id "
               "WHERE ")
        args = ['en']

        # check if ethereum address is given
        if regex.match('^0x[a-fA-F0-9]{40}$', username):
            sql += "users.toshi_id = $2"

        # otherwise verify that username is valid
        elif not regex.match('^[a-zA-Z][a-zA-Z0-9_]{2,59}$', username):
            raise JSONHTTPError(400, body={'errors': [{'id': 'invalid_username', 'message': 'Invalid Username'}]})
            sql += "lower(users.username) = lower($2)"

        if self.apps_only:
            sql += " AND users.is_app = $3 AND users.blocked = $4"
            args.extend([True, False])

        sql += " GROUP BY users.toshi_id"

        async with self.db:
            row = await self.db.fetchrow(sql, *args)

        if row is None:
            raise JSONHTTPError(404, body={'errors': [{'id': 'not_found', 'message': 'Not Found'}]})

        self.write(user_row_for_json(self.request, row))
项目:toshi-id-service    作者:toshiapp    | 项目源码 | 文件源码
def put(self, username):

        if regex.match('^0x[a-fA-F0-9]{40}$', username):

            address_to_update = username

        elif regex.match('^[a-zA-Z][a-zA-Z0-9_]{2,59}$', username):

            async with self.db:
                row = await self.db.fetchrow("SELECT * FROM users WHERE lower(username) = lower($1)", username)
            if row is None:
                raise JSONHTTPError(404, body={'errors': [{'id': 'not_found', 'message': 'Not Found'}]})

            address_to_update = row['toshi_id']


            raise JSONHTTPError(400, body={'errors': [{'id': 'invalid_username', 'message': 'Invalid Username'}]})

        request_address = self.verify_request()

        if not self.request.headers['Content-Type'].startswith('application/json') and not self.request.files:
            raise JSONHTTPError(400, body={'errors': [{'id': 'bad_data', 'message': 'Expected application/json or multipart/form-data'}]})

        if request_address != address_to_update:

            # check for superuser update
            if not self.is_superuser(request_address):
                raise JSONHTTPError(401, body={'errors': [{'id': 'permission_denied', 'message': 'Permission Denied'}]})

        if self.request.files:
            return await self.update_user_avatar(address_to_update)
            return await self.update_user(address_to_update)
项目:whaaaaat    作者:finklabs    | 项目源码 | 文件源码
def validate(self, document):
        ok = regex.match('^([01]{1})?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})\s?((?:#|ext\.?\s?|x\.?\s?){1}(?:\d+)?)?$', document.text)
        if not ok:
            raise ValidationError(
                message='Please enter a valid phone number',
                cursor_position=len(document.text))  # Move cursor to end
项目:whaaaaat    作者:finklabs    | 项目源码 | 文件源码
def validate(self, document):
        ok = regex.match('^([01]{1})?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})\s?((?:#|ext\.?\s?|x\.?\s?){1}(?:\d+)?)?$', document.text)
        if not ok:
            raise ValidationError(
                message='Please enter a valid phone number',
                cursor_position=len(document.text))  # Move cursor to end
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_search_star_plus(self):
        self.assertEqual('a*', 'xxx').span(0), (0, 0))
        self.assertEqual('x*', 'axx').span(), (0, 0))
        self.assertEqual('x+', 'axx').span(0), (1, 3))
        self.assertEqual('x+', 'axx').span(), (1, 3))
        self.assertEqual('x', 'aaa'), None)
        self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0))
        self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0))
        self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3))
        self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3))
        self.assertEqual(regex.match('a+', 'xxx'), None)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_1661(self):
        # Verify that flags do not get silently ignored with compiled patterns
        pattern = regex.compile('.')
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.match(pattern, 'A', regex.I))
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda:, 'A', regex.I))
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.findall(pattern, 'A', regex.I))
        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
          lambda: regex.compile(pattern, regex.I))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_re_match(self):
        self.assertEqual(regex.match('a', 'a')[:], ('a',))
        self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a'))
        self.assertEqual(regex.match(r'(a)', 'a')[0], 'a')
        self.assertEqual(regex.match(r'(a)', 'a')[1], 'a')
        self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a'))

        pat = regex.compile('((a)|(b))(c)?')
        self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None))
        self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None))
        self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c'))
        self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))
        self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))

        # A single group.
        m = regex.match('(a)', 'a')
        self.assertEqual(, 'a')
        self.assertEqual(, 'a')
        self.assertEqual(, 'a')
        self.assertEqual(, 1), ('a', 'a'))

        pat = regex.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b',
        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_groupdict(self):
        self.assertEqual(regex.match('(?P<first>first) (?P<second>second)',
          'first second').groupdict(), {'first': 'first', 'second': 'second'})
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_expand(self):
        self.assertEqual(regex.match("(?P<first>first) (?P<second>second)",
          "first second").expand(r"\2 \1 \g<second> \g<first>"),
          'second first second first')
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_repeat_minmax(self):
        self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None)
        self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None)
        self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None)
        self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None)

        self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c')
        self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')

        self.assertEqual(regex.match("^x{1}$", "xxx"), None)
        self.assertEqual(regex.match("^x{1}?$", "xxx"), None)
        self.assertEqual(regex.match("^x{1,2}$", "xxx"), None)
        self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None)

        self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x')
        self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x')
        self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x')
        self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '')

        self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True)
        self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)

        self.assertEqual(regex.match("^x{}$", "xxx"), None)
        self.assertEqual(bool(regex.match("^x{}$", "x{}")), True)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_getattr(self):
        self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)')
        self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.A | regex.I |
        self.assertEqual(regex.compile(u"(?i)(a)(b)").flags, regex.I | regex.U
          | regex.DEFAULT_VERSION)
        self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2)
        self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {})

          {'first': 1, 'other': 2})

        self.assertEqual(regex.match("(a)", "a").pos, 0)
        self.assertEqual(regex.match("(a)", "a").endpos, 1)

        self.assertEqual("b(c)", "abcdef").pos, 0)
        self.assertEqual("b(c)", "abcdef").endpos, 6)
        self.assertEqual("b(c)", "abcdef").span(), (1, 3))
        self.assertEqual("b(c)", "abcdef").span(1), (2, 3))

        self.assertEqual(regex.match("(a)", "a").string, 'a')
        self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1)))
        self.assertEqual(repr(type(regex.match("(a)", "a").re)),

        # Issue 14260.
        p = regex.compile(r'abc(?P<n>def)')
        p.groupindex["n"] = 0
        self.assertEqual(p.groupindex["n"], 1)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bigcharset(self):
        self.assertEqual(regex.match(ur"(?u)([\u2222\u2223])", u"\u2222")[1],
        self.assertEqual(regex.match(ur"(?u)([\u2222\u2223])", u"\u2222",
          regex.UNICODE)[1], u'\u2222')
          u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),
          u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),
          u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_non_consuming(self):
        self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a')
        self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a')
        self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a')
        self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a')
        self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a')
        self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a')
        self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a')

        self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a')
        self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a')
        self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a')
        self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a')
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_category(self):
        self.assertEqual(regex.match(r"(\s)", " ")[1], ' ')
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_re_escape(self):
        p = ""
        self.assertEqual(regex.escape(p), p)
        for i in range(0, 256):
            p += chr(i)
            self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))),
            self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(),
              (0, 1))

        pat = regex.compile(regex.escape(p))
        self.assertEqual(pat.match(p).span(), (0, 256))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_sre_character_literals(self):
        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
            self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True)
            self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")),
            self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")),
            self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True)
            self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")),
            self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")),

        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
          regex.match(r"\911", ""))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_113254(self):
        self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1)
        self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1)
        self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_527371(self):
        # Bug described in patches 527371/672491.
        self.assertEqual(regex.match(r'(a)?a','a').lastindex, None)
        self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1)
        self.assertEqual(regex.match("(?P<a>a(b))", "ab").lastgroup, 'a')
        self.assertEqual(regex.match("((a))", "a").lastindex, 1)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_418626(self):
        # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code
        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
        # pattern '*?' on a long string.
        self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0),
        self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' +
          'cde').end(0), 20003)
        self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0),
        # Non-simple '*?' still used to hit the recursion limit, before the
        # non-recursive scheme was implemented.
        self.assertEqual('(a|b)*?c', 10000 * 'ab' + 'cd').end(0),
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_stack_overflow(self):
        # Nasty cases that used to overflow the straightforward recursive
        # implementation of repeated groups.
        self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x')
        self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x')
        self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x')
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_448951(self):
        # Bug 448951 (similar to 429357, but with single char match).
        # (Also test greedy matches.)
        for op in '', '?', '*':
            self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z',
              None, None))
            self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z',
              'a:', 'a'))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_725149(self):
        # Mark_stack_base restoring before restoring marks.
        self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a',
        self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a',
          None, None))
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_bug_764548(self):
        # Bug 764548, regex.compile() barfs on str/unicode subclasses.
        class my_unicode(str): pass
        pat = regex.compile(my_unicode("abc"))
        self.assertEqual(pat.match("xyz"), None)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_empty_array(self):
        # SF buf 1647541.
        import array
        for typecode in 'cbBuhHiIlLfd':
            a = array.array(typecode)
            self.assertEqual(regex.compile("bla").match(a), None)
            self.assertEqual(regex.compile("").match(a)[1 : ], ())
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_inline_flags(self):
        # Bug #1700.
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below
        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below

        p = regex.compile(upper_char, regex.I | regex.U)
        self.assertEqual(bool(p.match(lower_char)), True)

        p = regex.compile(lower_char, regex.I | regex.U)
        self.assertEqual(bool(p.match(upper_char)), True)

        p = regex.compile('(?i)' + upper_char, regex.U)
        self.assertEqual(bool(p.match(lower_char)), True)

        p = regex.compile('(?i)' + lower_char, regex.U)
        self.assertEqual(bool(p.match(upper_char)), True)

        p = regex.compile('(?iu)' + upper_char)
        self.assertEqual(bool(p.match(lower_char)), True)

        p = regex.compile('(?iu)' + lower_char)
        self.assertEqual(bool(p.match(upper_char)), True)

        self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)
        self.assertEqual(bool(regex.match(r"a(?i)", "A")), True)
        self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True)
        self.assertEqual(regex.match(r"a(?iV1)", "A"), None)
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_ascii_and_unicode_flag(self):
        # Unicode patterns.
        for flags in (0, regex.UNICODE):
            pat = regex.compile(u'\xc0', flags | regex.IGNORECASE)
            self.assertEqual(bool(pat.match(u'\xe0')), True)
            pat = regex.compile(u'\w', flags)
            self.assertEqual(bool(pat.match(u'\xe0')), True)

        pat = regex.compile(u'\xc0', regex.ASCII | regex.IGNORECASE)
        self.assertEqual(pat.match(u'\xe0'), None)
        pat = regex.compile(u'(?a)\xc0', regex.IGNORECASE)
        self.assertEqual(pat.match(u'\xe0'), None)
        pat = regex.compile(u'\w', regex.ASCII)
        self.assertEqual(pat.match(u'\xe0'), None)
        pat = regex.compile(u'(?a)\w')
        self.assertEqual(pat.match(u'\xe0'), None)

        # String patterns.
        for flags in (0, regex.ASCII):
            pat = regex.compile('\xc0', flags | regex.IGNORECASE)
            self.assertEqual(pat.match('\xe0'), None)
            pat = regex.compile('\w')
            self.assertEqual(pat.match('\xe0'), None)

        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_new_named_groups(self):
        m0 = regex.match(r'(?P<a>\w)', 'x')
        m1 = regex.match(r'(?<a>\w)', 'x')
        if not (m0 and m1 and m0[:] == m1[:]):
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_grapheme(self):
        self.assertEqual(regex.match(ur"(?u)\X", u"\xE0").span(), (0, 1))
        self.assertEqual(regex.match(ur"(?u)\X", u"a\u0300").span(), (0, 2))

          u"a\xE0a\u0300e\xE9e\u0301"), [u'a', u'\xe0', u'a\u0300', u'e',
          u'\xe9', u'e\u0301'])
          u"a\xE0a\u0300e\xE9e\u0301"), [u'a\xe0a\u0300', u'e\xe9e\u0301'])
        self.assertEqual(regex.findall(ur"(?u)\X", u"\r\r\n\u0301A\u0301"),
          [u'\r', u'\r\n', u'\u0301', u'A\u0301'])
项目:oa_qian    作者:sunqb    | 项目源码 | 文件源码
def test_format(self):
        self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}",
          "foo bar"), "foo bar => bar foo")
        self.assertEqual(regex.subf(r"(?<word1>\w+) (?<word2>\w+)",
          "{word2} {word1}", "foo bar"), "bar foo")

        self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}",
          "foo bar"), ("foo bar => bar foo", 1))
        self.assertEqual(regex.subfn(r"(?<word1>\w+) (?<word2>\w+)",
          "{word2} {word1}", "foo bar"), ("bar foo", 1))

        self.assertEqual(regex.match(r"(\w+) (\w+)",
          "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo")
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def filter_word(text):
    """Take out english stopwords, punctuation, and compound endings."""
    text = normalize(text)
    if regex.match(r'^\p{P}+$', text):
        return True
    if text.lower() in STOPWORDS:
        return True
    return False
项目:backrefs    作者:facelessuser    | 项目源码 | 文件源码
def compile_replace(pattern, repl, flags=0):
        """Construct a method that can be used as a replace method for `sub`, `subn`, etc."""

        call = None
        if pattern is not None and isinstance(pattern, REGEX_TYPE):
            if isinstance(repl, (compat.string_type, compat.binary_type)):
                repl = ReplaceTemplate(pattern, repl, bool(flags & FORMAT))
                call = Replace(
                    functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash
            elif isinstance(repl, Replace):
                if flags:
                    raise ValueError("Cannot process flags argument with a compiled pattern!")
                if repl.pattern_hash != hash(pattern):
                    raise ValueError("Pattern hash doesn't match hash in compiled replace!")
                call = repl
            elif isinstance(repl, ReplaceTemplate):
                if flags:
                    raise ValueError("Cannot process flags argument with a ReplaceTemplate!")
                call = Replace(
                    functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash
                raise TypeError("Not a valid type!")
            raise TypeError("Pattern must be a compiled regular expression!")
        return call

    # Convenience methods like re has, but slower due to overhead on each call.
    # It is recommended to use compile_search and compile_replace
项目:wikt2pron    作者:abuccts    | 项目源码 | 文件源码
def tone_determ(text):
    text = unicodedata.normalize("NFD", text)
    match =, text)
    if match and in pinyin_tone.keys():
        return pinyin_tone[]
    return "5"
项目:wikt2pron    作者:abuccts    | 项目源码 | 文件源码
def pinyin_transform(text):
    if"?", text):
        return ""
    text = re.sub(
        unicodedata.normalize("NFD", "ü"),
            unicodedata.normalize("NFD", "ê"),
            unicodedata.normalize("NFD", text)
            "[aeiouêü]" + tones + "[aeiou]?[aeiouêü]" + tones + "",
        return ""
    text = text.lower()
    if not, text) and re.match("[1-5]", text):
        return re.sub("(\d)(\p{Ll})", "\1 \2", text)
    if"[??,.?]", text):
        text = re.sub(
            lambda x: " y?" if == "?" else " bù",
        text = re.sub("([??])", r" \1 ", text)
        text = re.sub("([,.?])", r" \1 ", text)
        text = re.sub(" +", " ", text)
        text = re.sub("^ ", "", text)
        text = re.sub(" $", "", text)
        text = re.sub("\. \. \.", "...", text)
    text = re.sub("['\-]", " ", text)
    text = re.sub(
        "([aeiouêü]" + tones + "?n?g?r?)([bpmfdtnlgkhjqxzcsywr]h?)",
        r"\1 \2",
    text = re.sub(" ([grn])$", r"\1", text)
    text = re.sub(" ([grn]) ", r"\1 ", text)

    return unicodedata.normalize("NFC", text)