我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用regex.finditer()。
def build_partial_stringtable(self, inband_elements): """ Use extracted in-band elements to populate partial StringTable with correct index """ # Find reference max index into decoded data max_index = 1 regex = re.compile(r'\[\[VALUE_0x([0-9a-fA-F]+)\]\]') for match in regex.finditer(self.output): if int(match.group(1), 16) > max_index: max_index = int(match.group(1), 16) # Compute beginning index of partial StringTable begin_index = max_index - (len(inband_elements)-1)*2 # Build partial StringTable partial_stringtable = collections.OrderedDict() for i in range(begin_index, max_index+1, 2): partial_stringtable[i] = inband_elements.pop(0) return partial_stringtable
def extract_inband_dictionary_from_xml(self): """ Extract known elements from StringTable that are inside the XML They must respect the syntax [[VALUE|ST_0xXX]] Those elements are aimed at being converted in binary """ inband_dictionary = {} # Find all reference to in-band dictionary into xml regex = re.compile(r'\[\[(.*?)\|ST_0x([0-9a-fA-F]+)\]\]') for match in regex.finditer(self.input): inband_dictionary[int(match.group(2), 16)] = match.group(1) # Replace [[VALUE|ST_0xXX]] by [[VALUE_0xXX]] into xml regex = re.compile(r'\[\[(?P<value>.)*?\|ST_0x(?P<number>[0-9a-fA-F]+)\]\]') self.input = re.sub(regex, '[[VALUE_0x\g<number>]]', self.input) #print self.input return inband_dictionary
def map_to_dogol_prime(self, s): """Map a string to Dogolpolsky' classes Args: s (unicode): IPA word Returns: (unicode): word with all segments collapsed to D' classes """ segs = [] for seg in self.fm.seg_regex.finditer(s): fts = self.fm.fts(seg.group(0)) for mask, label in self.dogol_prime: if fts >= mask: segs.append(label) break return ''.join(segs)
def test_finditer(self): it = regex.finditer(r":+", "a:b::c:::d") self.assertEqual([item[0] for item in it], [':', '::', ':::'])
def test_bug_581080(self): it = regex.finditer(r"\s", "a b") self.assertEqual(it.next().span(), (1, 2)) self.assertRaises(StopIteration, lambda: it.next()) scanner = regex.compile(r"\s").scanner("a b") self.assertEqual(scanner.search().span(), (1, 2)) self.assertEqual(scanner.search(), None)
def test_bug_817234(self): it = regex.finditer(r".*", "asdf") self.assertEqual(it.next().span(), (0, 4)) self.assertEqual(it.next().span(), (4, 4)) self.assertRaises(StopIteration, lambda: it.next())
def test_zerowidth(self): # Issue 3262. self.assertEqual(regex.split(r"\b", "a b"), ['a b']) self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b', '']) # Issue 1647489. self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', 'bar']) self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], ['', 'foo', 'bar']) self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo', '']) self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", "foo bar")], ['bar', 'foo', '']) self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', 'bar']) self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", "foo bar")], ['', 'foo', 'bar']) self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', 'foo', '']) self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", "foo bar")], ['bar', 'foo', '']) self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc']) self.assertEqual([m for m in regex.splititer("", "xaxbxc")], ['xaxbxc']) self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc']) self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], ['xaxbxc']) self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x', 'b', 'x', 'c', '']) self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['', 'x', 'a', 'x', 'b', 'x', 'c', '']) self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b', 'x', 'a', 'x', '']) self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['', 'c', 'x', 'b', 'x', 'a', 'x', ''])
def test_bug_10328 (self): # Issue 10328. pat = regex.compile(r'(?mV0)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)') self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', 'foobar '), ('foobar<trailing_ws>', 1)) self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', '']) pat = regex.compile(r'(?mV1)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)') self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', 'foobar '), ('foobar<trailing_ws><no_final_newline>', 2)) self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', ''])
def find_flags(self, s, quotes, re_verbose, re_version): """Find verbose and Unicode flags.""" new = [] start = 0 verbose_flag = re_verbose version_flag = re_version avoid = quotes + self.groups avoid.sort() if version_flag and verbose_flag: return bool(verbose_flag), version_flag for a in avoid: new.append(s[start:a[0] + 1]) start = a[1] new.append(s[start:]) for m in self._regex_flags.finditer(self._empty.join(new)): if m.group(2): if self._verbose_flag in m.group(2): verbose_flag = True if self._V0 in m.group(2): version_flag = V0 elif self._V1 in m.group(2): version_flag = V1 if version_flag and verbose_flag: break return bool(verbose_flag), version_flag if version_flag else regex.DEFAULT_VERSION
def finditer( pattern, string, flags=0, pos=None, endpos=None, overlapped=False, partial=False, concurrent=None, **kwargs ): """Wrapper for `finditer`.""" return regex.finditer( _apply_search_backrefs(pattern, flags), string, flags, pos, endpos, overlapped, partial, concurrent, **kwargs )
def spell_check_plain_text(dictionary, text): text = regex.sub( r'\\[Nnh]', ' ', # two spaces so that matches mantain position in text text) for match in regex.finditer(r'\p{L}[\p{L}\p{P}]*\p{L}|\p{L}', text): if not dictionary.check(match.group(0)): yield (match.start(), match.end())
def segment_text(text, seg_regex=SEG_REGEX): """Return an iterator of segments in the text. Args: text (unicode): string of IPA Unicode text seg_regex (_regex.Pattern): compiled regex defining a segment (base + modifiers) Return: generator: segments in the input text """ for m in seg_regex.finditer(text): yield m.group(0)
def fts(s): """Given string `s` with +/-[alphabetical sequence]s, return list of features. Args: s (str): string with segments of the sort "+son -syl 0cor" Return: list: list of (value, feature) tuples """ return [m.groups() for m in FT_REGEX.finditer(s)]
def pat(p): """Given a string `p` with feature matrices (features grouped with square brackets into segments, return a list of sets of (value, feature) tuples. Args: p (str): list of feature matrices as strings Return: list: list of sets of (value, feature) tuples """ pattern = [] for matrix in [m.group(0) for m in MT_REGEX.finditer(p)]: segment = set([m.groups() for m in FT_REGEX.finditer(matrix)]) pattern.append(segment) return pattern
def segs(self, word): """Returns a list of segments from a word Args: word (unicode): input word as Unicode IPA string Returns: list: list of strings corresponding to segments found in `word` """ return [m.group('all') for m in self.seg_regex.finditer(word)]
def filter_string(self, word): """Return a string like the input but containing only legal IPA segments Args: word (unicode): input string to be filtered Returns: unicode: string identical to `word` but with invalid IPA segments absent """ segs = [m.group(0) for m in self.seg_regex.finditer(word)] return ''.join(segs)
def __init__(self, names, features={}, ftstr='', weights=None): """Construct a `Segment` object Args: names (list): ordered list of feature names features (dict): name-value pairs for specified features ftstr (unicode): a string, each /(+|0|-)\w+/ sequence of which is interpreted as a feature specification weights (float): order list of feature weights/saliences """ self.n2s = {-1: '-', 0: '0', 1: '+'} self.s2n = {k: v for (v, k) in self.n2s.items()} self.names = names """Set a feature specification""" self.data = {} for name in names: if name in features: self.data[name] = features[name] else: self.data[name] = 0 for m in re.finditer(r'(\+|0|-)(\w+)', ftstr): v, k = m.groups() self.data[k] = self.s2n[v] if weights: self.weights = weights else: self.weights = [1 for _ in names]
def ftstr2dict(ftstr): fts = {} for m in re.finditer(r'([-0+])(\w+)', ftstr): v, k = m.groups() fts[k] = {'-': -1, '0': 0, '+': 1}[v] return fts
def test_copy(self): # PatternObjects are immutable, therefore there's no need to clone them. r = regex.compile("a") self.assert_(copy.copy(r) is r) self.assert_(copy.deepcopy(r) is r) # MatchObjects are normally mutable because the target string can be # detached. However, after the target string has been detached, a # MatchObject becomes immutable, so there's no need to clone it. m = r.match("a") self.assert_(copy.copy(m) is not m) self.assert_(copy.deepcopy(m) is not m) self.assert_(m.string is not None) m2 = copy.copy(m) m2.detach_string() self.assert_(m.string is not None) self.assert_(m2.string is None) # The following behaviour matches that of the re module. it = regex.finditer(".", "ab") it2 = copy.copy(it) self.assertEqual(it.next().group(), "a") self.assertEqual(it2.next().group(), "b") # The following behaviour matches that of the re module. it = regex.finditer(".", "ab") it2 = copy.deepcopy(it) self.assertEqual(it.next().group(), "a") self.assertEqual(it2.next().group(), "b") # The following behaviour is designed to match that of copying 'finditer'. it = regex.splititer(" ", "a b") it2 = copy.copy(it) self.assertEqual(it.next(), "a") self.assertEqual(it2.next(), "b") # The following behaviour is designed to match that of copying 'finditer'. it = regex.splititer(" ", "a b") it2 = copy.deepcopy(it) self.assertEqual(it.next(), "a") self.assertEqual(it2.next(), "b")