Python regex 模块，sub() 实例源码

我们从Python开源项目中，提取了以下49个代码示例，用于说明如何使用regex.sub()。

项目：DL2W 作者：gauravmm | 项目源码 | 文件源码

def extract_video_titles(input_file, output_file):
    with open(input_file, encoding='utf-8') as file:
        titles = json.load(file)

    count = 0

    for video_id, title in titles.items():
        # Remove all punctuation and symbols in unicode
        title = re.sub(r'[\p{P}\p{S}]+', '', title)
        titles[video_id] = title.split(' ')

        count += 1
        print('{}: {}'.format(count, video_id), end='\r', flush=True)

    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(titles, file)

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_bug_1140(self):
        # regex.sub(x, y, u'') should return u'', not '', and
        # regex.sub(x, y, '') should return '', not u''.
        # Also:
        # regex.sub(x, y, unicode(x)) should return unicode(y), and
        # regex.sub(x, y, str(x)) should return
        #     str(y) if isinstance(y, str) else unicode(y).
        for x in 'x', u'x':
            for y in 'y', u'y':
                z = regex.sub(x, y, u'')
                self.assertEqual((type(z), z), (unicode, u''))
                z = regex.sub(x, y, '')
                self.assertEqual((type(z), z), (str, ''))
                z = regex.sub(x, y, unicode(x))
                self.assertEqual((type(z), z), (unicode, unicode(y)))
                z = regex.sub(x, y, str(x))
                self.assertEqual((type(z), z), (type(y), y))

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_symbolic_refs(self):
        self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda:
          regex.sub('(?P<a>x)', r'\g<a', 'xx'))
        self.assertRaisesRegex(regex.error, self.MISSING_GROUP_NAME, lambda:
          regex.sub('(?P<a>x)', r'\g<', 'xx'))
        self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda:
          regex.sub('(?P<a>x)', r'\g', 'xx'))
        self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:
          regex.sub('(?P<a>x)', r'\g<a a>', 'xx'))
        self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:
          regex.sub('(?P<a>x)', r'\g<1a1>', 'xx'))
        self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda:
          regex.sub('(?P<a>x)', r'\g<ab>', 'xx'))

        # The new behaviour of unmatched but valid groups is to treat them like
        # empty matches in the replacement template, like in Perl.
        self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
        self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')

        # The old behaviour was to raise it as an IndexError.
        self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:
          regex.sub('(?P<a>x)', r'\g<-1>', 'xx'))

项目：DrQA 作者：facebookresearch | 项目源码 | 文件源码

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

项目：DrQA_cn 作者：AmoseKang | 项目源码 | 文件源码

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

项目：toshi-services-lib 作者：toshiapp | 项目源码 | 文件源码

def prepare(self):

        # log the full request and headers if the log level is set to debug
        if log.level == 10:
            log.debug("Preparing request: {} {}".format(self.request.method, self.request.path))
            for k, v in self.request.headers.items():
                log.debug("{}: {}".format(k, v))

        if 'X-Forwarded-Proto' in self.request.headers:
            proto = self.request.headers['X-Forwarded-Proto']
        else:
            proto = self.request.protocol
        if proto != 'https' and 'enforce_https' in self.application.config['general']:
            mode = self.application.config['general']['enforce_https']
            if mode == 'reject':
                self.set_status(404)
                self.finish()
            else:
                # default to redirect
                self.redirect(regex.sub(r'^([^:]+)', 'https', self.request.full_url()), permanent=True)

        return super().prepare()

项目：bubblesub 作者：rr- | 项目源码 | 文件源码

def eval_expr(expr):
    import ast
    import operator as op

    op = {
        ast.Add: op.add,
        ast.Sub: op.sub,
        ast.Mult: op.mul,
        ast.Div: op.truediv,
        ast.Pow: op.pow,
        ast.BitXor: op.xor,
        ast.USub: op.neg,
    }

    def eval_(node):
        if isinstance(node, ast.Num):
            return fractions.Fraction(node.n)
        elif isinstance(node, ast.BinOp):
            return op[type(node.op)](eval_(node.left), eval_(node.right))
        elif isinstance(node, ast.UnaryOp):
            return op[type(node.op)](eval_(node.operand))
        raise TypeError(node)

    return eval_(ast.parse(str(expr), mode='eval').body)

项目：joyodb 作者：leoboiko | 项目源码 | 文件源码

def test_against_kanjidic(self):
        kanjidic_data = {}
        with open(kanjidic_file, 'rt') as f:
            for line in f:
                kanji, *fields = line.strip().split()

                if kanji in TestLoadedData.kanjis.keys():
                    # kanjidic marks bound affixes with '-', but we don't
                    fields = [re.sub('-$', '', f) for f in fields]
                    fields = [re.sub('^-', '', f) for f in fields]

                    kanjidic_data[kanji] = fields

        for kanji in joyodb.loaded_data.kanjis:
            for reading in kanji.readings:
                if reading.variation_of:
                    continue # variations are not in kanjidic
                if (kanji.kanji, reading.reading) not in KANJIDIC_MISSING_READINGS:
                    self.assertIn(reading.reading, kanjidic_data[kanji.kanji])

项目：transformer 作者：Kyubyong | 项目源码 | 文件源码

def make_vocab(fpath, fname):
    '''Constructs vocabulary.

    Args:
      fpath: A string. Input file path.
      fname: A string. Output file name.

    Writes vocabulary line by line to `preprocessed/fname`
    '''  
    text = codecs.open(fpath, 'r', 'utf-8').read()
    text = regex.sub("[^\s\p{Latin}']", "", text)
    words = text.split()
    word2cnt = Counter(words)
    if not os.path.exists('preprocessed'): os.mkdir('preprocessed')
    with codecs.open('preprocessed/{}'.format(fname), 'w', 'utf-8') as fout:
        fout.write("{}\t1000000000\n{}\t1000000000\n{}\t1000000000\n{}\t1000000000\n".format("<PAD>", "<UNK>", "<S>", "</S>"))
        for word, cnt in word2cnt.most_common(len(word2cnt)):
            fout.write(u"{}\t{}\n".format(word, cnt))

项目：ShadowSocksShare-OpenShift 作者：the0demiurge | 项目源码 | 文件源码

def request_url(url, headers=None, name=''):
    print('req', url)

    data = set()
    servers = list()
    try:
        response = requests.get(url, headers=headers, verify=False).text
        data.update(map(lambda x: re.sub('\s', '', x), re.findall('ssr?://[a-zA-Z0-9=]+', response)))
        soup = BeautifulSoup(response, 'html.parser')
        title = soup.find('title').text

        info = {'message': '', 'url': url, 'name': str(title)}
        for i, server in enumerate(data):
            try:
                servers.append(parse(server, ' '.join([title, name, str(i)])))
            except Exception as e:
                logging.exception(e, stack_info=False)
                print('URL:', url, 'SERVER', server)
    except Exception as e:
        print(url)
        logging.exception(e, stack_info=False)
        return [], {'message': str(e), 'url': '', 'name': ''}
    return servers, info

项目：siamese_sentiment 作者：jcavalieri8619 | 项目源码 | 文件源码

def generate_char_list(string, strip_html=True):
    if strip_html:
        s = strip_html_tags(string.lower())
    else:
        s = string.lower()
    normalized_string = regex.sub(r'\s+', r' ', s)  # change any kind of whitespace to a single space

    list_norm_chars = regex.findall(r"\w|[?!'#@$:\"&*=,]", normalized_string)
    return list_norm_chars

项目：siamese_sentiment 作者：jcavalieri8619 | 项目源码 | 文件源码

def generate_word_list(string, strip_html=True):
    if strip_html:
        s = strip_html_tags(string.lower())
    else:
        s = string.lower()

    normalized_string = regex.sub(r"\s+", r' ', s)  # change any kind of whitespace to a single space

    # list of words all words seen during training including strings like '!!!' , '??', '....'
    # as these repeated punctuations tend to imply more than the're gramatical meaning
    list_normalized_string = regex.findall(r"\b\w+[']?\w*\b|\!+|\?+|\.{3,}", normalized_string)
    return list_normalized_string

项目：siamese_sentiment 作者：jcavalieri8619 | 项目源码 | 文件源码

def strip_html_tags(string, verbose=False):
    p = regex.compile(r'<.*?>')
    return p.sub(' ', string)

项目：whaaaaat 作者：finklabs | 项目源码 | 文件源码

def remove_ansi_escape_sequences(text):
    # http://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python
    # also clean up the line endings
    return regex.sub(r'(\x9b|\x1b\[)[0-?]*[ -\/]*[@-~]|\ *\r', '', text)


# helper for running sut as subprocess within pty
# does two things
# * test app running in pty in subprocess
# * get test coverage from subprocess

# docu:
# http://blog.fizyk.net.pl/blog/gathering-tests-coverage-for-subprocesses-in-python.html

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_basic_regex_sub(self):
        self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
        self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
          '9.3 -3 24x100y')
        self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
          '9.3 -3 23x99y')

        self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n")
        self.assertEqual(regex.sub('.', r"\n", 'x'), "\n")

        self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')
        self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')
        self.assertEqual(regex.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'),
          'xxxx')
        self.assertEqual(regex.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')

        self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D',
          'a'), "\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D")
        self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a")
        self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10)
          + chr(11) + chr(13) + chr(12) + chr(7))

        self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest')

        self.assertEqual(regex.sub(ur"x", ur"\x0A", u"x"), u"\n")
        self.assertEqual(regex.sub(ur"x", ur"\u000A", u"x"), u"\n")
        self.assertEqual(regex.sub(ur"x", ur"\U0000000A", u"x"), u"\n")
        self.assertEqual(regex.sub(ur"x", ur"\N{LATIN CAPITAL LETTER A}",
          u"x"), u"A")

        self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n")
        self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\\u000A")
        self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"),
          "\\U0000000A")
        self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}",
          "x"), "\\N{LATIN CAPITAL LETTER A}")

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_bug_449964(self):
        # Fails for group followed by other escape.
        self.assertEqual(regex.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),
          "xx\bxx\b")

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_bug_449000(self):
        # Test for sub() on escaped characters.
        self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
          "abc\ndef\n")
        self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
          "abc\ndef\n")
        self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
          "abc\ndef\n")
        self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
          "abc\ndef\n")

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_bug_114660(self):
        self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
          'hello there')

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_bug_462270(self):
        # Test for empty sub() behaviour, see SF bug #462270
        self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-')
        self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-')
        self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d')

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_dollar_matches_twice(self):
        # $ matches the end of string, and just before the terminating \n.
        pattern = regex.compile('$')
        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')

        pattern = regex.compile('$', regex.MULTILINE)
        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#')
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')

项目：oa_qian 作者：sunqb | 项目源码 | 文件源码

def test_unmatched_in_sub(self):
        # Issue 1519638.
        self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), 'y-x')
        self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "xy"), 'y-x-')
        self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x')
        self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "x"), '-x-')
        self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y-')
        self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "y"), 'y--')

项目：turboparser-semafor 作者：ReutersMedia | 项目源码 | 文件源码

def split_if_contraction(self, word):
        # Handle preposition+determiner contractions.
        word = regex.sub(ur'^(A|a)u$', ur'à le', word)
        word = regex.sub(ur'^(A|a)uquel$', ur'à lequel', word)
        word = regex.sub(ur'^(A|a)ux$', ur'à les', word)
        word = regex.sub(ur'^(A|a)uxquels$', ur'à lesquels', word)
        word = regex.sub(ur'^(A|a)uxquelles$', ur'à lesquelles', word)
        word = regex.sub(ur'^(D|d)u$', ur'de le', word)
        word = regex.sub(ur'^(D|d)uquel$', ur'de lequel', word)
        word = regex.sub(ur'^(D|d)es$', ur'de les', word)
        word = regex.sub(ur'^(D|d)esquels$', ur'de lesquels', word)
        word = regex.sub(ur'^(D|d)esquelles$', ur'de lesquelles', word)

        return word

项目：chicksexer 作者：kensk8er | 项目源码 | 文件源码

def _clean_characters(self, characters):
        """Clean characters (e.g. convert \t to a space)."""
        if self._lower:
            characters = characters.lower()

        characters = regex.sub(r'\t|\s+|\u200d', ' ', characters)
        characters = regex.sub(r'`', "'", characters)
        characters = regex.sub(r'–', "-", characters)
        return characters

项目：backrefs 作者：facelessuser | 项目源码 | 文件源码

def compile_replace(pattern, repl, flags=0):
        """Construct a method that can be used as a replace method for `sub`, `subn`, etc."""

        call = None
        if pattern is not None and isinstance(pattern, REGEX_TYPE):
            if isinstance(repl, (compat.string_type, compat.binary_type)):
                repl = ReplaceTemplate(pattern, repl, bool(flags & FORMAT))
                call = Replace(
                    functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash
                )
            elif isinstance(repl, Replace):
                if flags:
                    raise ValueError("Cannot process flags argument with a compiled pattern!")
                if repl.pattern_hash != hash(pattern):
                    raise ValueError("Pattern hash doesn't match hash in compiled replace!")
                call = repl
            elif isinstance(repl, ReplaceTemplate):
                if flags:
                    raise ValueError("Cannot process flags argument with a ReplaceTemplate!")
                call = Replace(
                    functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash
                )
            else:
                raise TypeError("Not a valid type!")
        else:
            raise TypeError("Pattern must be a compiled regular expression!")
        return call

    # Convenience methods like re has, but slower due to overhead on each call.
    # It is recommended to use compile_search and compile_replace

项目：backrefs 作者：facelessuser | 项目源码 | 文件源码

def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, **kwargs):
        """Wrapper for `sub`."""

        is_replace = _is_replace(repl)
        is_string = isinstance(repl, (compat.string_type, compat.binary_type))
        if is_replace and repl.use_format:
            raise ValueError("Compiled replace cannot be a format object!")

        pattern = compile_search(pattern, flags)
        return regex.sub(
            pattern, (compile_replace(pattern, repl) if is_replace or is_string else repl), string,
            count, flags, pos, endpos, concurrent, **kwargs
        )

项目：backrefs 作者：facelessuser | 项目源码 | 文件源码

def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, **kwargs):  # noqa B002
        """Wrapper for `subf`."""

        is_replace = _is_replace(format)
        is_string = isinstance(format, (compat.string_type, compat.binary_type))
        if is_replace and not format.use_format:
            raise ValueError("Compiled replace is not a format object!")

        pattern = compile_search(pattern, flags)
        rflags = FORMAT if is_string else 0
        return regex.sub(
            pattern, (compile_replace(pattern, format, flags=rflags) if is_replace or is_string else format), string,
            count, flags, pos, endpos, concurrent, **kwargs
        )

项目：docximport-sigil-plugin 作者：dougmassay | 项目源码 | 文件源码

def build_html(fragment, css=False):
    fragment = regex.sub(r'<p([^>])*></p>', r'<p\1>&#160;</p>', fragment)
    css_link = ''
    if css:
        css_link = LINK_TEXT
    new = HTML.format(css_link, fragment)
    soup = gumbo_bs4.parse(new)
    return soup.serialize_xhtml()

项目：concernCapture 作者：ctfu | 项目源码 | 文件源码

def splitWord(str):
    str = re.sub("[^A-Za-z]", "", str)
    words = re.split(r'(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|(?<=[a-z]A)(?=[A-Z])', str, flags=re.V1)
    return words

项目：Price-Comparator 作者：Thejas-1 | 项目源码 | 文件源码

def remove_punctuation(self, text):
        ''' Get rid of punctuation except apostrophes '''
        return re.sub(r"[^\P{P}\']+", "", text)

项目：tailchaser 作者：thanos | 项目源码 | 文件源码

def slugify(value):
    """
    Parameters
    ----------
    value: str
        the value to slug
    Convert spaces to hyphens.
    Remove characters that aren't alphanumerics, underscores, or hyphens.
    Convert to lowercase. Also strip leading and trailing whitespace.
    """
    value = regex.sub('[^\w\s-]', '', value).strip().lower()
    return regex.sub('[-\s]+', '-', value)

项目：neteasenews 作者：tricomm | 项目源码 | 文件源码

def jsonFormat(year=2014, month=1, day=1, newsType=0):
    text = getJson(year, month, day, newsType)
    returnValue = list()
    if text.startswith('var data=') is True:
        tmp = re.sub(',*,', ',', text.lstrip('var data=').rstrip(';').replace('\n', '').replace(',[]', ''))
        if newsType is not 0:
            tmp = re.sub(r'(,|\{)([a-z]*?)(:)', r'\1"\2"\3', tmp)
            tmp = re.sub(r'(\[),(\{)', r'\1\2', tmp.replace('\\', '/'))
        try:
            tmpValue = json.loads(tmp, strict=False)
        except:
            return list()
        childClassification = getChildClassification(tmpValue[u'category'])
        if newsType is 1:
            valuelist = tmpValue[u'ent']
        else:
            valuelist = tmpValue[u'news']
        for list0 in valuelist:
            for list1 in list0:
                if list1 is not None:
                    if list1[u'l'].find('photoview') is -1 and list1[u'l'].find('blog') is -1:
                        returnValue.append(
                            [list1[u'p'].split()[0], list1[u'p'].split()[1], getSiteURL(newsType)[0],
                             childClassification[list1[u'c']], list1[u'l'],
                             list1[u't']])
        del tmp
        del text
        del tmpValue
        del childClassification
        del valuelist
        gc.collect()
    return returnValue

项目：neteasenews 作者：tricomm | 项目源码 | 文件源码

def getnews(URL):
    date = str()
    html = networkExceptionCatch(URL)
    soup = BeautifulSoup(html, 'html.parser')
    alls = soup.find_all('div', id="endText")
    for div in alls:
        if div.find('script'):
            div = re.sub(r'<script.*?</script>', '', div)
        p_in_div = div.find_all('p')
        if len(p_in_div) is 0:
            p_in_div = re.sub(r'(<div id="endText">)(.*?)(</p>)(<p>)', r'\1\2\4', p_in_div)
        for p_tag in p_in_div:
            if p_tag.text is not None:
                date += p_tag.text + u'\n'
    return date

项目：stashpy 作者：afroisalreadyinu | 项目源码 | 文件源码

def sub_pattern(self, match):
        match_dict = match.groupdict()
        pattern = GROK_PATTERNS[match_dict['pattern_name']]
        pattern_output_raw = match_dict['pattern_output']
        pattern_type_raw = match_dict['pattern_type']
        if pattern_output_raw:
            pattern_output = pattern_output_raw.lstrip(':')
            new_pattern = GROK_NEW_PATTERN.format(name=pattern_output, pattern=pattern)
            if pattern_type_raw:
                pattern_type = pattern_type_raw.lstrip(':')
                self.pattern_types[pattern_output] = __builtins__[pattern_type]
        else:
            new_pattern = pattern
        return regex.sub(GROK_REPLACE_PATTERN, self.sub_pattern, new_pattern)

项目：stashpy 作者：afroisalreadyinu | 项目源码 | 文件源码

def grok_re_preprocess(re_pattern):
    traverser = PatternTraverser()
    new_pattern = regex.sub(GROK_REPLACE_PATTERN, traverser.sub_pattern, re_pattern)
    return new_pattern, traverser.pattern_types

项目：stashpy 作者：afroisalreadyinu | 项目源码 | 文件源码

def test_collect_types(self):
        traverser = pattern_matching.PatternTraverser()
        re = regex.sub(pattern_matching.GROK_REPLACE_PATTERN,
                       traverser.sub_pattern,
                       'This is process %{POSINT:processid:int} running in %{PATH:process_dir}')
        self.assertDictEqual(traverser.pattern_types, {'processid': int})

项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者：SignalMedia | 项目源码 | 文件源码

def remove_punctuation(self, text):
        ''' Get rid of punctuation except apostrophes '''
        return re.sub(r"[^\P{P}\']+", "", text)