我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用regex.MULTILINE。
def __init__(self, **kwargs): """ Args: annotators: None or empty set (only tokenizes). substitutions: if true, normalizes some token types (e.g. quotes). """ self._regexp = regex.compile( '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|' '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|' '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|' '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' % (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN, self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2, self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE, self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT, self.NON_WS), flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE ) if len(kwargs.get('annotators', {})) > 0: logger.warning('%s only tokenizes! Skipping annotators: %s' % (type(self).__name__, kwargs.get('annotators'))) self.annotators = set() self.substitutions = kwargs.get('substitutions', True)
def test_constants(self): if regex.I != regex.IGNORECASE: self.fail() if regex.L != regex.LOCALE: self.fail() if regex.M != regex.MULTILINE: self.fail() if regex.S != regex.DOTALL: self.fail() if regex.X != regex.VERBOSE: self.fail()
def test_dollar_matches_twice(self): # $ matches the end of string, and just before the terminating \n. pattern = regex.compile('$') self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#') pattern = regex.compile('$', regex.MULTILINE) self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#') self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#')
def regex_match(text, pattern): """Test if a regex pattern is contained within a text.""" try: pattern = re.compile( pattern, flags=re.IGNORECASE + re.UNICODE + re.MULTILINE, ) except BaseException: return False return pattern.search(text) is not None
def regex_match_score(prediction, pattern): """Check if the prediction matches the given regular expression.""" try: compiled = re.compile( pattern, flags=re.IGNORECASE + re.UNICODE + re.MULTILINE ) except BaseException: logger.warn('Regular expression failed to compile: %s' % pattern) return False return compiled.match(prediction) is not None
def __init__(self, **kwargs): """ Args: annotators: None or empty set (only tokenizes). """ self._regexp = regex.compile( '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS), flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE ) if len(kwargs.get('annotators', {})) > 0: logger.warning('%s only tokenizes! Skipping annotators: %s' % (type(self).__name__, kwargs.get('annotators'))) self.annotators = set()
def make_xpath_ranges(html, phrase): '''Given a HTML string and a `phrase`, build a regex to find offsets for the phrase, and then build a list of `XPathRange` objects for it. If this fails, return empty list. ''' if not html: return [] if not isinstance(phrase, unicode): try: phrase = phrase.decode('utf8') except: logger.info('failed %r.decode("utf8")', exc_info=True) return [] phrase_re = re.compile( phrase, flags=re.UNICODE | re.IGNORECASE | re.MULTILINE) spans = [] for match in phrase_re.finditer(html, overlapped=False): spans.append(match.span()) # a list of tuple(start, end) char indexes # now run fancy aligner magic to get xpath info and format them as # XPathRange per above try: xpath_ranges = list(char_offsets_to_xpaths(html, spans)) except: logger.info('failed to get xpaths', exc_info=True) return [] ranges = [] for xpath_range in filter(None, xpath_ranges): ranges.append(dict( start=dict(node=xpath_range.start_xpath, idx=xpath_range.start_offset + 1), end=dict(node=xpath_range.end_xpath, idx=xpath_range.end_offset))) return ranges