我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用regex.IGNORECASE。
def __init__(self, **kwargs): """ Args: annotators: None or empty set (only tokenizes). substitutions: if true, normalizes some token types (e.g. quotes). """ self._regexp = regex.compile( '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|' '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|' '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|' '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' % (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN, self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2, self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE, self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT, self.NON_WS), flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE ) if len(kwargs.get('annotators', {})) > 0: logger.warning('%s only tokenizes! Skipping annotators: %s' % (type(self).__name__, kwargs.get('annotators'))) self.annotators = set() self.substitutions = kwargs.get('substitutions', True)
def barcode_to_regex(barcode: str, error_rate: Optional[int]=None): """Convert a barcode string to a regex pattern barcode [str] The barcode string to turn into a regex error_rate [int]=None The error rate""" pattern = '' # type: str umi = regex.findall(r'(N+)', barcode, regex.IGNORECASE) # type: List[str] umi_lengths = tuple(map(len, umi)) # type: Tuple[int] filtered_barcode = filter(None, barcode.upper().split('N')) # type: filter for index, subpattern in enumerate(filtered_barcode): # type: int, str barcode_pattern = '(' + subpattern + ')' # type: str if error_rate: barcode_pattern += '{e<=' + str(error_rate) + '}' pattern += barcode_pattern try: umi_pattern = '(' + ''.join(itertools.repeat('[ACGT]', umi_lengths[index])) + ')' # type: str except IndexError: break else: if error_rate: umi_pattern += '{e<=' + str(error_rate) + '}' pattern += umi_pattern find_barcode = regex.compile(r'%s' % pattern, regex.ENHANCEMATCH) return find_barcode
def test_constants(self): if regex.I != regex.IGNORECASE: self.fail() if regex.L != regex.LOCALE: self.fail() if regex.M != regex.MULTILINE: self.fail() if regex.S != regex.DOTALL: self.fail() if regex.X != regex.VERBOSE: self.fail()
def test_ascii_and_unicode_flag(self): # Unicode patterns. for flags in (0, regex.UNICODE): pat = regex.compile(u'\xc0', flags | regex.IGNORECASE) self.assertEqual(bool(pat.match(u'\xe0')), True) pat = regex.compile(u'\w', flags) self.assertEqual(bool(pat.match(u'\xe0')), True) pat = regex.compile(u'\xc0', regex.ASCII | regex.IGNORECASE) self.assertEqual(pat.match(u'\xe0'), None) pat = regex.compile(u'(?a)\xc0', regex.IGNORECASE) self.assertEqual(pat.match(u'\xe0'), None) pat = regex.compile(u'\w', regex.ASCII) self.assertEqual(pat.match(u'\xe0'), None) pat = regex.compile(u'(?a)\w') self.assertEqual(pat.match(u'\xe0'), None) # String patterns. for flags in (0, regex.ASCII): pat = regex.compile('\xc0', flags | regex.IGNORECASE) self.assertEqual(pat.match('\xe0'), None) pat = regex.compile('\w') self.assertEqual(pat.match('\xe0'), None) self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: regex.compile('(?au)\w'))
def calc_unwanted_chars_re(self): unwanted_chars_re = u'[^\p{{AlNum}}{safe_chars}]+'.format(safe_chars=re.escape(self._safe_chars or '')) self.unwanted_chars_re = re.compile(unwanted_chars_re, re.IGNORECASE) if self._stop_words: unwanted_chars_and_words_re = unwanted_chars_re + u'|(?<!\p{AlNum})(?:\L<stop_words>)(?!\p{AlNum})' self.unwanted_chars_and_words_re = re.compile(unwanted_chars_and_words_re, re.IGNORECASE, stop_words=self._stop_words) else: self.unwanted_chars_and_words_re = None
def regex_match(text, pattern): """Test if a regex pattern is contained within a text.""" try: pattern = re.compile( pattern, flags=re.IGNORECASE + re.UNICODE + re.MULTILINE, ) except BaseException: return False return pattern.search(text) is not None
def regex_match_score(prediction, pattern): """Check if the prediction matches the given regular expression.""" try: compiled = re.compile( pattern, flags=re.IGNORECASE + re.UNICODE + re.MULTILINE ) except BaseException: logger.warn('Regular expression failed to compile: %s' % pattern) return False return compiled.match(prediction) is not None
def __init__(self, **kwargs): """ Args: annotators: None or empty set (only tokenizes). """ self._regexp = regex.compile( '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS), flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE ) if len(kwargs.get('annotators', {})) > 0: logger.warning('%s only tokenizes! Skipping annotators: %s' % (type(self).__name__, kwargs.get('annotators'))) self.annotators = set()
def replaceURL(self, url): """ Replace [URL] """ pattern = re.compile('\[URL\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(url, self.parsed_cmdline)
def replaceHOST(self, host): """ Replace [HOST] """ pattern = re.compile('\[HOST\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(host, self.parsed_cmdline)
def replaceIP(self, ip): """ Replace [IP] """ pattern = re.compile('\[IP\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(ip, self.parsed_cmdline)
def replacePORT(self, port): """ Replace [PORT] """ pattern = re.compile('\[PORT\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(str(port), self.parsed_cmdline)
def replacePROTOCOL(self, protocol): """ Replace [PROTOCOL] """ pattern = re.compile('\[PROTOCOL\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(protocol, self.parsed_cmdline)
def replaceOUTPUT(self, output_file): """ Replace [OUTPUT] if present Otherwise, add at the end of the command: 2>&1 | tee [OUTPUT] """ pattern = re.compile('\[OUTPUT\]', re.IGNORECASE) if pattern.search(self.parsed_cmdline): self.parsed_cmdline = pattern.sub('"{0}"'.format(output_file), self.parsed_cmdline) else: self.parsed_cmdline += ' 2>&1 | tee "{0}"'.format(output_file)
def replaceOUTPUTDIR(self, output_dir): """ Replace [OUTPUTDIR] if present """ pattern = re.compile('\[OUTPUTDIR\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(output_dir, self.parsed_cmdline)
def replaceTOOLBOXDIR(self, toolbox_dir): """ Replace [TOOLBOXDIR] (toolbox directory) """ pattern = re.compile('\[TOOLBOXDIR\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(toolbox_dir, self.parsed_cmdline)
def replaceWORDLISTSDIR(self, wordlists_dir): """ Replace [WORDLISTSDIR] (wordlists directory) """ pattern = re.compile('\[WORDLISTSDIR\]', re.IGNORECASE) self.parsed_cmdline = pattern.sub(wordlists_dir, self.parsed_cmdline)
def replaceSpecificTags(self, service, specific_args): """ Replace specific tags (depends on the selected service) eg. for http : [SSL option="value"] [CMS cms1="val" cms2="val" ... default="val"] """ for tag in Constants.SPECIFIC_TOOL_OPTIONS[service].keys(): option_type = SpecificOptions.specificOptionType(service, tag) if option_type == 'boolean': try: pattern = re.compile(r'\[' + tag.upper() + '\s+option\s*=\s*[\'"](?P<option>.*?)[\'"]\s*\]', re.IGNORECASE) m = pattern.search(self.parsed_cmdline) # option is True if tag in specific_args.keys() and specific_args[tag]: self.parsed_cmdline = pattern.sub(m.group('option'), self.parsed_cmdline) # option is False else: self.parsed_cmdline = pattern.sub('', self.parsed_cmdline) except Exception as e: pass elif option_type == 'list_member': try: #print tag #print specific_args pattern = regex.compile(r'\[' + tag.upper() + '(?:\s+(?P<name>\w+)\s*=\s*[\'"](?P<value>[ a-zA-Z0-9_,;:-]*)[\'"])+\s*\]', regex.IGNORECASE) m = pattern.search(self.parsed_cmdline) capt = m.capturesdict() #print capt if tag in specific_args.keys() and specific_args[tag]: value = capt['value'][capt['name'].index(specific_args[tag])] self.parsed_cmdline = pattern.sub(value, self.parsed_cmdline) elif 'default' in [e.lower() for e in capt['name']]: value = capt['value'][capt['name'].index('default')] self.parsed_cmdline = pattern.sub(value, self.parsed_cmdline) else: self.parsed_cmdline = pattern.sub('', self.parsed_cmdline) except Exception as e: pass
def make_xpath_ranges(html, phrase): '''Given a HTML string and a `phrase`, build a regex to find offsets for the phrase, and then build a list of `XPathRange` objects for it. If this fails, return empty list. ''' if not html: return [] if not isinstance(phrase, unicode): try: phrase = phrase.decode('utf8') except: logger.info('failed %r.decode("utf8")', exc_info=True) return [] phrase_re = re.compile( phrase, flags=re.UNICODE | re.IGNORECASE | re.MULTILINE) spans = [] for match in phrase_re.finditer(html, overlapped=False): spans.append(match.span()) # a list of tuple(start, end) char indexes # now run fancy aligner magic to get xpath info and format them as # XPathRange per above try: xpath_ranges = list(char_offsets_to_xpaths(html, spans)) except: logger.info('failed to get xpaths', exc_info=True) return [] ranges = [] for xpath_range in filter(None, xpath_ranges): ranges.append(dict( start=dict(node=xpath_range.start_xpath, idx=xpath_range.start_offset + 1), end=dict(node=xpath_range.end_xpath, idx=xpath_range.end_offset))) return ranges