Python re 模块,finditer() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用re.finditer()

项目:malware    作者:JustF0rWork    | 项目源码 | 文件源码
def fetch():
    retval = {}
    content = retrieve_content(__url__)

    if __check__ in content:
        for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content):
            prefix, mask = match.groups()
            mask = int(mask)
            start_int = addr_to_int(prefix) & make_mask(mask)
            end_int = start_int | ((1 << 32 - mask) - 1)
            if 0 <= end_int - start_int <= 1024:
                address = start_int
                while start_int <= address <= end_int:
                    retval[int_to_addr(address)] = (__info__, __reference__)
                    address += 1

    return retval
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv
项目:PlasoScaffolder    作者:ClaudiaSaxer    | 项目源码 | 文件源码
def _GetEndOfTableIfNotAlias(self, query: str, column_name: str) -> bool:
    """Getting the start of the column if it is not an alias column

    Args:
      query (str): the query to be searched
      column_name (str): the name to be searched for

    Returns:
      bool: 0 if no column could be found or the starting position of the
          column
    """
    wrong_positions = [name.start() for name in
                       re.finditer('.{0} as'.format(column_name), query)]
    found_positions = []
    for space in self._POSSIBLEQUERYSEPERATOR:
      found_positions += [name.start() for name in
                          re.finditer('.{0}{1}'.format(column_name, space),
                                      query)]

    position = set(found_positions) - set(wrong_positions)

    if position:
      return position.pop()
    else:
      return 0
项目:whatstyle    作者:mikr    | 项目源码 | 文件源码
def register_options(self):
        # type: () -> None
        """Parse options from text like this:
        Preferences:
          [+|-]alignArguments                                        Enable/disable ...
          ...
          [+|-]spacesWithinPatternBinders                            Enable/disable ...
          -alignSingleLineCaseStatements.maxArrowIndent=[1-100]      Set Maximum number ...
          -indentSpaces=[1-10]                                       Set Number of spaces ...
        """
        exeresult = run_executable(self.exe, ['--help'], cache=self.cache)
        options = []
        text = unistr(exeresult.stdout)
        for m in re.finditer(r'^  (\[\+\|-\]|-)([a-z][a-zA-Z.]+)(?:=\[(\d+)-(\d+)\])?', text,
                             re.MULTILINE):
            optionprefix, optionname, start, end = m.groups()
            if start is None:
                optiontype = 'bool'
                configs = [True, False]  # type: List[OptionValue]
            else:
                optiontype = 'int'
                configs = list(inclusiverange(int(start), int(end)))
            options.append(option_make(optionname, optiontype, configs))
        self.styledefinition = styledef_make(options)
项目:nettools    作者:germandutchwindtunnels    | 项目源码 | 文件源码
def filter_output(self, output, regex):
        """ Filter output from a command """
        result = {}
        result_list = []
        if isinstance(output, str):
            lines = [output]
        else:
            lines = output

        for line in lines:
            iterator = re.finditer(regex, line)
            try:
                while True:
                    cur = iterator.next()
                    result = cur.groupdict()
                    result['hostname'] = self.host
                    result_list.append(result)
            except StopIteration:
                pass

        return result_list
项目:swjtu-pyscraper    作者:Desgard    | 项目源码 | 文件源码
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv
项目:SerpScrap    作者:ecoron    | 项目源码 | 文件源码
def split_into_sentences(text):
    potential_end_pat = re.compile(r"".join([
        r"([\w\.'’&\]\)]+[\.\?!])",  # A word that ends with punctuation
        r"([‘’“”'\"\)\]]*)",  # Followed by optional quote/parens/etc
        r"(\s+(?![a-z\-–—]))",  # Followed by whitespace + non-(lowercase or dash)
        ]),
        re.U
    )
    dot_iter = re.finditer(potential_end_pat, text)
    end_indices = [
        (x.start() + len(x.group(1)) + len(x.group(2)))
        for x in dot_iter
        if is_sentence_ender(x.group(1))
    ]
    spans = zip([None] + end_indices, end_indices + [None])
    sentences = [
        text[start:end].strip() for start, end in spans
    ]
    return sentences
项目:infi.clickhouse_orm    作者:Infinidat    | 项目源码 | 文件源码
def create_ad_hoc_field(cls, db_type):
        '''
        Give an SQL column description such as "Enum8('apple' = 1, 'banana' = 2, 'orange' = 3)"
        this method returns a matching enum field.
        '''
        import re
        try:
            Enum # exists in Python 3.4+
        except NameError:
            from enum import Enum # use the enum34 library instead
        members = {}
        for match in re.finditer("'(\w+)' = (\d+)", db_type):
            members[match.group(1)] = int(match.group(2))
        enum_cls = Enum('AdHocEnum', members)
        field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
        return field_class(enum_cls)
项目:routersploit    作者:reverse-shell    | 项目源码 | 文件源码
def tokenize(token_specification, text):
    Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])

    token_specification.extend((
        ('NEWLINE', r'\n'),          # Line endings
        ('SKIP', r'.'),              # Any other character
    ))

    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, text):
        kind = mo.lastgroup
        value = filter(lambda x: x is not None, mo.groups())
        if kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
        elif kind == 'SKIP':
            pass
        else:
            column = mo.start() - line_start
            yield Token(kind, value, line_num, column, mo)
项目:malware    作者:JustF0rWork    | 项目源码 | 文件源码
def fetch():
    retval = {}
    content = retrieve_content(__url__)

    if __check__ in content:
        for match in re.finditer(r"(?m)^([\d.]+),IP used by ([^,]+) C&C", content):
            retval[match.group(1)] = ("%s (malware)" % match.group(2).lower(), __reference__)

    for row in re.finditer(r"(?s)<tr>(.+?)</tr>", content):
        if "<span>100%</span>" in row.group(1):
            domain = re.search(r"get_data_domain\('([^']+)", row.group(1))
            if domain:
                tag = re.search(r">(trojan|spyware|adware)\.([^<]+)", row.group(1))
                retval[domain.group(1)] = (("%s (malware)" % tag.group(2)) if tag else "malware", __reference__)

    return retval
项目:BITSInject    作者:SafeBreach-Labs    | 项目源码 | 文件源码
def remove_job(self, job_guid):
        """
        Remove a job given its GUID, including decreasing the job counter of the queue
        :param job_guid: the GUID of the job to remove from queue
        :return: void
        """
        global _os_ver
        occurs = [(oc.start(), oc.end()) for oc in
                  list(re.finditer('%s' % (BITSStateFile.JOB_HEADER_FOOTER_HEX[_os_ver].decode('hex')),
                                   self._original_data))
                  ]
        if occurs:
            self.set_jobs_counter(self.get_jobs_counter() - 1)
            state_off = self._get_job_state_off(job_guid)
            new_data_list = list(self._new_data)
            job_start_off, job_end_off = BITSStateFile._get_job_limits_by_index_in_between(occurs, state_off)
            new_data_list = new_data_list[:job_start_off + 1] + new_data_list[job_end_off + 1:]
            self._update_new_data(0, "".join(new_data_list))
            self.commit()
项目:Sci-Finder    作者:snverse    | 项目源码 | 文件源码
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv
项目:CoBL-public    作者:lingdb    | 项目源码 | 文件源码
def post(self, request, pk, **kwargs):
        instance = CognateClassCitation.objects.get(id=pk)
        form = EditCognateClassCitationForm(request.POST, instance=instance)
        try:
            # validate {ref foo ...}
            s = Source.objects.all().filter(deprecated=False)
            pattern = re.compile(r'(\{ref +([^\{]+?)(:[^\{]+?)? *\})')
            for m in re.finditer(pattern, form.data['comment']):
                foundSet = s.filter(shorthand=m.group(2))
                if not foundSet.count() == 1:
                    raise ValidationError('In field “Comment” source shorthand “%(name)s” is unknown.', 
                                                params={'name': m.group(2)})
            form.save()
        except ValidationError as e:
            messages.error(
                request,
                'Sorry, the server had problems updating the cognate citation. %s' % e)
            return self.render_to_response({"form": form})
        return HttpResponseRedirect(reverse('cognate-class-citation-detail', args=[pk]))
项目:purelove    作者:hucmosin    | 项目源码 | 文件源码
def tokenize(token_specification, text):
    Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])

    token_specification.extend((
        ('NEWLINE', r'\n'),          # Line endings
        ('SKIP', r'.'),              # Any other character
    ))

    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, text):
        kind = mo.lastgroup
        value = filter(lambda x: x is not None, mo.groups())
        if kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
        elif kind == 'SKIP':
            pass
        else:
            column = mo.start() - line_start
            yield Token(kind, value, line_num, column, mo)
项目:annotated-py-sqlalchemy    作者:hhstore    | 项目源码 | 文件源码
def after_compile(self):
        if self.engine.positional:
            self.positiontup = []
            match = r'%\(([\w_]+)\)s'
            params = re.finditer(match, self.strings[self.statement])
            for p in params:
                self.positiontup.append(p.group(1))
            if self.engine.paramstyle=='qmark':
                self.strings[self.statement] = re.sub(match, '?', self.strings[self.statement])
            elif self.engine.paramstyle=='format':
                self.strings[self.statement] = re.sub(match, '%s', self.strings[self.statement])
            elif self.engine.paramstyle=='numeric':
                i = [0]
                def getnum(x):
                    i[0] += 1
                    return str(i[0])
                self.strings[self.statement] = re.sub(match, getnum, self.strings[self.statement])
项目:pymotw3    作者:reingart    | 项目源码 | 文件源码
def test_patterns(text, patterns):
    """Given source text and a list of patterns, look for
    matches for each pattern within the text and print
    them to stdout.
    """
    # Look for each pattern in the text and print the results
    for pattern, desc in patterns:
        print("'{}' ({})\n".format(pattern, desc))
        print("  '{}'".format(text))
        for match in re.finditer(pattern, text):
            s = match.start()
            e = match.end()
            substr = text[s:e]
            n_backslashes = text[:s].count('\\')
            prefix = '.' * (s + n_backslashes)
            print("  {}'{}'".format(prefix, substr))
        print()
    return
项目:Sparcli    作者:4Kaylum    | 项目源码 | 文件源码
def pun(self, ctx):
        '''
        Gives a random pun from the depths of the internet
        '''

        # Read from page
        async with self.session.get('http://www.punoftheday.com/cgi-bin/randompun.pl') as r:
            page = await r.text()

        # Scrape the raw HTML
        r = r'(<div class=\"dropshadow1\">\n<p>).*(</p>\n</div>)'
        foundPun = [i for i in finditer(r, page)][0].group()

        # Filter out the pun
        r = r'(>).*(<)'
        filteredPun = [i for i in finditer(r, foundPun)][0].group()

        # Boop it out
        fullPun = filteredPun[1:-1]
        await self.sparcli.say(fullPun)
项目:Sparcli    作者:4Kaylum    | 项目源码 | 文件源码
def steamid(self, ctx, *, gameURL:str):
        '''
        Gets the information of a game from Steam URL
        '''

        await self.sparcli.send_typing(ctx.message.channel)

        # Grab the game ID from the user input
        regexMatches = finditer(r'\d+', gameURL)
        regexList = [i for i in regexMatches]

        # Parse it as a group
        if len(regexList) == 0:
            await self.sparcli.say('I was unable to find the ID of that game on the Steam API.')
            return
        else:
            await self.getSteamGameInfo(regexList[0].group())
项目:Sparcli    作者:4Kaylum    | 项目源码 | 文件源码
def messageToEmbed(message):

    # Get some default values that'll be in the embed
    author = message.author 
    description = message.content
    image = False

    # Check to see if any images were added
    regexMatch = r'.+(.png)|.+(.jpg)|.+(.jpeg)|.+(.gif)'
    if len(message.attachments) > 0:
        attachment = message.attachments[0]
        matchList = [i for i in finditer(regexMatch, attachment['filename'])]
        if len(matchList) > 0:
            image = attachment['url']

    # Get the time the message was created
    createdTime = '.'.join(str(message.timestamp).split('.')[:-1])

    # Make and return the embed
    return makeEmbed(user=author, description=description, image=image, footer=createdTime)
项目:deep_throat    作者:wdbm    | 项目源码 | 文件源码
def match_and_replace(
    text    = None,
    rule    = None,
    phoneme = None
    ):
    """
    Replace found text from a single rule.
    """
    # Find all rule matches.
    matches = [(match.start(), match.end()) for \
        match in re.finditer(rule, text)]
    # Start from behind, so replace in-place.
    matches.reverse()
    # Convert to characters because strings are immutable.
    characters = list(text)
    for start, end in matches:
        characters[start:end] = phoneme
    # Convert back to string.
    return "".join(characters)
项目:expdevBadChars    作者:mgeeky    | 项目源码 | 文件源码
def unpack_dword(line):
        outs = ''
        i = 0
        for m in re.finditer(r'((?:0x[0-9a-f]{8},?\s*))', line):
            l = m.group(0)
            l = l.replace(',', '')
            l = l.replace(' ', '')
            dword = int(l, 16)
            unpack = reversed([
                (dword & 0xff000000) >> 24,
                (dword & 0x00ff0000) >> 16,
                (dword & 0x0000ff00) >>  8,
                (dword & 0x000000ff)
            ])
            i += 4
            for b in unpack:
                outs += '%02x' % b

        out(dbg("After callback ('%s')" % outs))
        return BytesParser.formats_compiled['hexstring'].match(outs)
项目:Qyoutube-dl    作者:lzambella    | 项目源码 | 文件源码
def extract_videos_from_page(self, page):
        ids_in_page = []
        titles_in_page = []
        for mobj in re.finditer(self._VIDEO_RE, page):
            # The link with index 0 is not the first video of the playlist (not sure if still actual)
            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
                continue
            video_id = mobj.group('id')
            video_title = unescapeHTML(mobj.group('title'))
            if video_title:
                video_title = video_title.strip()
            try:
                idx = ids_in_page.index(video_id)
                if video_title and not titles_in_page[idx]:
                    titles_in_page[idx] = video_title
            except ValueError:
                ids_in_page.append(video_id)
                titles_in_page.append(video_title)
        return zip(ids_in_page, titles_in_page)
项目:Qyoutube-dl    作者:lzambella    | 项目源码 | 文件源码
def format_to_regex(self, fmt):
        """
        Converts a string like
           '%(title)s - %(artist)s'
        to a regex like
           '(?P<title>.+)\ \-\ (?P<artist>.+)'
        """
        lastpos = 0
        regex = ""
        # replace %(..)s with regex group and escape other string parts
        for match in re.finditer(r'%\((\w+)\)s', fmt):
            regex += re.escape(fmt[lastpos:match.start()])
            regex += r'(?P<' + match.group(1) + '>.+)'
            lastpos = match.end()
        if lastpos < len(fmt):
            regex += re.escape(fmt[lastpos:len(fmt)])
        return regex
项目:Qyoutube-dl    作者:lzambella    | 项目源码 | 文件源码
def extract_object(self, objname):
        obj = {}
        obj_m = re.search(
            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
            r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
            r'\}\s*;',
            self.code)
        fields = obj_m.group('fields')
        # Currently, it only supports function definitions
        fields_m = re.finditer(
            r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
            fields)
        for f in fields_m:
            argnames = f.group('args').split(',')
            obj[f.group('key')] = self.build_function(argnames, f.group('code'))

        return obj
项目:python-tutorial    作者:Akuli    | 项目源码 | 文件源码
def find_links(file):
    """Find all markdown links in a file object.

    Yield (lineno, regexmatch) tuples.
    """
    # don't yield same link twice
    seen = set()

    # we need to loop over the file two lines at a time to support
    # multi-line (actually two-line) links, so this is kind of a mess
    firsts, seconds = itertools.tee(file)
    next(seconds)  # first line is never second line

    # we want 1-based indexing instead of 0-based and one-line links get
    # caught from linepair[1], so we need to start at two
    for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
        lines = linepair[0] + linepair[1]
        for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
            if match.group(0) not in seen:
                seen.add(match.group(0))
                yield match, lineno
项目:kingpin    作者:pinterest    | 项目源码 | 文件源码
def split_problematic_endpoints_line(line):
    """
    If the line of host contains more than one ":",
    for example: 10.99.184.69:900010.37.170.125:9006
    this splits the line and return a list of correct endpoints

    Args:
        ``line``: the problemtic line which contains more than one endpoint string.

    Returns:
        the splitted list of the problematic line which has correct endpoint strings.
    """

    colon_parts = line.strip().split(":")
    offset = len(colon_parts[-1])
    colon_positions = [m.start() for m in re.finditer(':', line)]
    start = 0
    split_parts = []
    for colon_position in colon_positions:
        end = colon_position + offset + 1
        split_part = line[start:end]
        split_parts.append(split_part)
        start = end
    return split_parts
项目:gwot-physical    作者:JanVan01    | 项目源码 | 文件源码
def get_classes(self, folder, class_suffix, selected = None):
        classes = []
        for file in glob.glob(folder + "/*.py"):
            handle = open(file, "r")
            content = handle.read()
            handle.close()

            module = folder.replace('/', '.').replace('\\', '.') + '.' + os.path.basename(file).replace('.py', '')

            regexp = "\sclass\s+([\w\d]+"+class_suffix+")\s*\(([\w\d]*)\)\s*:\s"
            for m in re.finditer(regexp, content):
                parent_class = m.group(2)
                if len(parent_class) == 0 or parent_class == 'object':
                    continue
                class_name = m.group(1)
                classes.append(module + '.' + class_name)

        return classes
项目:thesis-check    作者:albalitz    | 项目源码 | 文件源码
def analyze(line, linenum, lang):
    annotations = []

    if lang is Language.en_EN or lang is None:
        weasel_words = WEASEL_WORDS_EN
    elif lang is Language.de_DE:
        weasel_words = WEASEL_WORDS_DE

    for weasel_pattern in weasel_words:
        for match in re.finditer(weasel_pattern, line, flags=re.IGNORECASE):
            if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
                continue

            index = match.start()
            annotation = WeaselWord(linenum, line, index, word=match.group(0))
            annotations.append(annotation)

    return annotations
项目:thesis-check    作者:albalitz    | 项目源码 | 文件源码
def analyze(line, linenum, lang):
    annotations = []

    if lang is Language.en_EN or lang is None:
        contractions = CONTRACTIONS_EN
    elif lang is Language.de_DE:
        contractions = CONTRACTIONS_DE

    for pattern in contractions:
        for match in re.finditer(pattern, line, flags=re.IGNORECASE):
            if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
                continue

            index = match.start()
            replaced_contraction = re.sub(pattern, contractions[pattern], match.group(0), flags=re.IGNORECASE)
            annotation = Contraction(linenum, line, index, word=match.group(0), contraction=replaced_contraction)
            annotations.append(annotation)

    return annotations
项目:astropy-bot    作者:astropy    | 项目源码 | 文件源码
def paged_github_json_request(url, headers=None):

    response = requests.get(url, headers=headers)
    assert response.ok, response.content
    results = response.json()

    if 'Link' in response.headers:

        links = response.headers['Link']

        # There are likely better ways to parse/extract the link information
        # but here we just find the last page number mentioned in the header
        # 'Link' section and then loop over all pages to get the comments
        last_match = list(re.finditer('page=[0-9]+', links))[-1]
        last_page = int(links[last_match.start():last_match.end()].split('=')[1])

        # If there are other pages, just loop over them and get all the
        # comments
        if last_page > 1:
            for page in range(2, last_page + 1):
                response = requests.get(url + '?page={0}'.format(page), headers=headers)
                assert response.ok, response.content
                results += response.json()

    return results
项目:Python-GoogleDrive-VideoStream    作者:ddurdle    | 项目源码 | 文件源码
def getOfflineMediaList(self, folderName=False, title=False, contentType=7):

        mediaFiles = []
        for r1 in re.finditer('\{(.*?)\"spaces\"\:' , entryS, re.DOTALL):
            entry = r1.group(1)
            media = self.getMediaPackage(entry, folderName=folderName, contentType=contentType, fanart=folderFanart, icon=folderIcon)
            if media is not None:
                mediaFiles.append(media)



        return mediaFiles





    ##
    # retrieve a list of videos, using playback type stream
    #   parameters: prompt for video quality (optional), cache type (optional)
    #   returns: list of videos
    ##
项目:sequana    作者:sequana    | 项目源码 | 文件源码
def get_occurences(self, pattern, overlap=False):
        """Return position of the input pattern in the sequence

        ::

            >>> from sequana import Sequence
            >>> s = Sequence('ACGTTTTACGT')
            >>> s.get_occurences("ACGT")
            [0, 7]

        """
        if overlap is False:
            res = [m.start() for m in re.finditer(pattern, self.sequence)]
        elif overlap is True:
            res = [m.start() for m in re.finditer('(?=%s)'%pattern, self.sequence)]
        return res

        # reverse find-all without overlaps, you can combine positive and
        # negative lookahead into an expression like this:
        #res = [m.start() for m in re.finditer('(?=%s)(?!.{1,%d}%s)' % (search,
        #    len(pattern)-1, pattern), 'ttt')]
项目:core-python    作者:yidao620c    | 项目源码 | 文件源码
def find_cute(url):
    # ??????
    r = requests.get(url)
    # ??r.encoding
    encoding = re.search('content="text/html;\s*charset=(.*?)"', r.text).group(1)
    r.encoding = encoding
    # print(r.text)
    finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text)
    i = random.randint(0, sum(1 for _ in finds))
    start = 0
    finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text)
    for f in finds:
        if start == i:
            print(f.group(1))
            break
        start += 1
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes))
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes))
项目:my-first-blog    作者:AnkurBegining    | 项目源码 | 文件源码
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes))
项目:mysql-er    作者:StefanLim0    | 项目源码 | 文件源码
def get_selects_from_text(content):
        sqls = []
        select_keyword = '@Select\s*\('
        for m in re.finditer(select_keyword, content):
            rparen_pos = MybatisInlineSqlExtractor.find_right_paren_pos(content[m.end():])
            if rparen_pos < 0:
                continue
            sqls.append(SQL('', eval(content[m.end():m.end() + rparen_pos].replace('\r', '').replace('\n', '')).strip()))
        return sqls
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_properties_dict(self, properties_str):
        if isinstance(properties_str, dict):
            return properties_str

        properties = collections.OrderedDict()
        pattern = re.compile('(\S+?)\s*"(.*?)"')
        for m in re.finditer(pattern, properties_str):
            key = m.group(1)
            value = m.group(2)
            properties[key] = value
        return properties
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_unambiguous_regions(reference_path):
    '''Calculate regions corresponding to unambiguous bases'''
    chrom_map = {}
    for chrom, seq in open_reference(reference_path).items():
        regions = [(m.start(), m.end()) for m in re.finditer('[acgtACGT]+', seq[:])]
        chrom_map[chrom] = Regions(regions=regions)
    return chrom_map
项目:tokenize-uk    作者:lang-uk    | 项目源码 | 文件源码
def tokenize_sents(string):
    """
    Tokenize input text to sentences.

    :param string: Text to tokenize
    :type string: str or unicode
    :return: sentences
    :rtype: list of strings
    """
    string = six.text_type(string)

    spans = []
    for match in re.finditer('[^\s]+', string):
        spans.append(match)
    spans_count = len(spans)

    rez = []
    off = 0

    for i in range(spans_count):
        tok = string[spans[i].start():spans[i].end()]
        if i == spans_count - 1:
            rez.append(string[off:spans[i].end()])
        elif tok[-1] in ['.', '!', '?', '…', '»']:
            tok1 = tok[re.search('[.!?…»]', tok).start()-1]
            next_tok = string[spans[i + 1].start():spans[i + 1].end()]
            if (next_tok[0].isupper()
                and not tok1.isupper()
                and not (tok[-1] != '.'
                         or tok1[0] == '('
                         or tok in ABBRS)):
                rez.append(string[off:spans[i].end()])
                off = spans[i + 1].start()

    return rez
项目:whatstyle    作者:mikr    | 项目源码 | 文件源码
def register_options(self):
        # type: () -> None
        """Parse options from text like this:
        # Uncrustify 0.63
        #
        # General options
        #

        newlines                                  { Auto, LF, CR, CRLF }
          The type of line endings

        input_tab_size                            Number
          The original size of tabs in the input

        indent_align_string                       { False, True }
          Whether to indent strings broken by '\' so that they line up
        """
        exeresult = run_executable(self.exe, ['--show-config'], cache=self.cache)
        options = []
        text = unistr(exeresult.stdout)
        for m in re.finditer(r'^(\w+)\s+(.*?)\s*$', text, re.MULTILINE):
            optionname, optiondesc = m.group(1), m.group(2)
            if optiondesc.startswith('{'):
                optiontype = 'Enum'
                configs = optiondesc[1:-1].strip().split(', ')
                configs = [c.lower() for c in configs]
            else:
                optiontype = optiondesc
                configs = []
            options.append(option_make(optionname, optiontype, configs))
        self.styledefinition = styledef_make(options)
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def __handle_tostring(self):
        for match in re.finditer('(\d+)\[t\+o\+S\+t\+r\+i\+n\+g\](\d+)', self.js):
            repl = to_base(match.group(1), match.group(2))
            self.js = self.js.replace(match.group(0), repl)
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def __get_attribs(element):
    attribs = {}
    for match in re.finditer('''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''', element):
        match = match.groupdict()
        value1 = match.get('value1')
        value2 = match.get('value2')
        value = value1 if value1 is not None else value2
        if value is None: continue
        attribs[match['key'].lower().strip()] = value
    return attribs
项目:Radiojavan    作者:nimasaj    | 项目源码 | 文件源码
def album(URL):
    track_list=[]
    if (URL.find('?index=')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?index=')]
        current_track_no=int(URL[len(a1)+len('?index='):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    elif (URL.find('?start')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?start')]
        current_track_no=int(URL[len(a1)+len('?start'):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    else:
        all_track_nr=(html.count('?index='))//2
        a1=URL
        current_track_no='null'
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    i=0
    b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')]
    b_len=len('<span class="song_name">')
    iter=re.finditer(r'<span class="song_name">', b)
    indices=[m.start(0) for m in iter]
    while i<all_track_nr:
        track_list.append('%s?index=%d'%(a1,i))
        d=(b[indices[i]:].find('</span>'))
        track_name=b[indices[i]+b_len:indices[i]+d]
        track_list.append(track_name)
        i+=1
    return(track_list)
项目:Radiojavan    作者:nimasaj    | 项目源码 | 文件源码
def album(URL):
    track_list=[]
    if (URL.find('?index=')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?index=')]
        current_track_no=int(URL[len(a1)+len('?index='):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    elif (URL.find('?start')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?start')]
        current_track_no=int(URL[len(a1)+len('?start'):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    else:
        all_track_nr=(html.count('?index='))//2
        a1=URL
        current_track_no='null'
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    i=0
    b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')]
    b_len=len('<span class="song_name">')
    iter=re.finditer(r'<span class="song_name">', b)
    indices=[m.start(0) for m in iter]
    while i<all_track_nr:
        track_list.append('%s?index=%d'%(a1,i))
        d=(b[indices[i]:].find('</span>'))
        track_name=b[indices[i]+b_len:indices[i]+d]
        track_list.append(track_name)
        i+=1
    return(track_list)
项目:googletranslate.popclipext    作者:wizyoung    | 项目源码 | 文件源码
def legacy_format_json(original):
    # save state
    states = []
    text = original

    # save position for double-quoted texts
    for i, pos in enumerate(re.finditer('"', text)):
        # pos.start() is a double-quote
        p = pos.start() + 1
        if i % 2 == 0:
            nxt = text.find('"', p)
            states.append((p, text[p:nxt]))

    # replace all weired characters in text
    while text.find(',,') > -1:
        text = text.replace(',,', ',null,')
    while text.find('[,') > -1:
        text = text.replace('[,', '[null,')

    # recover state
    for i, pos in enumerate(re.finditer('"', text)):
        p = pos.start() + 1
        if i % 2 == 0:
            j = int(i / 2)
            nxt = text.find('"', p)
            # replacing a portion of a string
            # use slicing to extract those parts of the original string to be kept
            text = text[:p] + states[j][1] + text[nxt:]

    converted = json.loads(text)
    return converted
项目:BioNanoAnalyst    作者:AppliedBioinformatics    | 项目源码 | 文件源码
def make_RefCmap(fasta_file, enz=None, min_len=20, min_nsite=5, path=None):
    name = fasta_file.rsplit('.',1)[0].split('/')[-1]
    index = 0
    enzymes = {'BspQI':'GCTCTTC',
                'BbvCI':'CCTCAGC',
                'Bsml':'GAATGC',
                'BsrDI':'GCAATG',
                'bseCI':'ATCGAT',
                'BssSI':'CACGAG'}
    try:
        cmap_file='%s/%s_%s.cmap'%(path,name,enz)
        forwards = enzymes[enz]
        reverse = str(Seq(forwards).reverse_complement())
        with open (cmap_file,'a') as ref_cmap:
            ref_cmap.write('# CMAP File Version:\t0.1\n')
            ref_cmap.write('# Label Channels:\t1\n')
            ref_cmap.write('# Nickase Recognition Site 1:\t%s\n'%forwards)
            ref_cmap.write('# Enzyme1:\tNt.%s\n'%enz)
            ref_cmap.write('# Number of Consensus Nanomaps:\tN/A\n')
            ref_cmap.write('#h CMapId\tContigLength\tNumSites\tSiteID\tLabelChannel\tPosition\tStdDev\tCoverage\tOccurrence\n')
            ref_cmap.write('#f int\tfloat\tint\tint\tint\tfloat\tfloat\tint\tint\n')
            for seqs in SeqIO.parse(fasta_file,'fasta'):
                seq = str(seqs.seq.upper())
                seq_len = len(seq)
                index+=1
                if seq_len >= min_len*1000:
                    nsites = len(re.findall('%s|%s'%(forwards,reverse),seq))
                    if nsites >=min_nsite:
                        j=1
                        for o in re.finditer('%s|%s'%(forwards,reverse),seq):
                            ref_cmap.write('%s\t%.1f\t%d\t%d\t1\t%.1f\t1.0\t1\t1\n'%(index,seq_len,nsites,j,o.start()+1))
                            j+=1
                        ref_cmap.write('%s\t%.1f\t%d\t%d\t0\t%.1f\t0.0\t1\t0\n'%(index,seq_len,nsites,j,seq_len))
    except:
        pass
项目:mod_stat    作者:DadoZe    | 项目源码 | 文件源码
def formatString(self, text, stats, not_found_replacement = None):
        #try:
        values = stats['values']
        for m in re.finditer("{{([gc]:)?([^}:]*)((:d)|(:1f)|:(\d+)|:(\d+)\.(\d+)f|(:\+d)|(:\+1f))?}}", text):
            g, g1, key, g2, sg1, sg2, sg3, sg4a, sg4b, sg5, sg6 = m.group(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
            if not key in values:
                if not_found_replacement is None:
                    if d: LOG_NOTE('No key in values of %s (%s)' % (stats.get('_type', 'unknown'), key))
                else:
                    text = text.replace('%s' % g, not_found_replacement)
            elif g1 is None:
                if g2 is None:
                    text = text.replace('{{%s}}' % key, self.applyMacros(values[key]))
                elif sg1:
                    text = text.replace('{{%s:d}}' % key, self.applyMacros(values[key], 0))
                elif sg2:
                    text = text.replace('{{%s:1f}}' % key, self.applyMacros(values[key], 1))
                elif sg3:
                    xx = int(sg3)
                    text = text.replace('{{%s:%d}}' % (key, xx), self.applyMacros2(values[key], xx))
                elif sg4a:
                    xx, yy = int(sg4a), int(sg4b)
                    text = text.replace('{{%s:%d.%df}}' % (key, xx, yy), self.applyMacros2(values[key], xx, yy))
                elif sg5:
                    text = text.replace('{{%s:+d}}' % key, self.applyMacros(values[key], 0, '+'))
                elif sg6:
                    text = text.replace('{{%s:+1f}}' % key, self.applyMacros(values[key], 1, '+'))
            elif g1=="g:":
                text = text.replace('{{g:%s}}' % key, stats['gradient'][key])
            elif g1=="c:":
                text = text.replace('{{c:%s}}' % key, stats['palette'][key])
        #except:
        #  LOG_CURRENT_EXCEPTION()
        #finally:
        return text
项目:yargy    作者:natasha    | 项目源码 | 文件源码
def __call__(self, text):
        for match in re.finditer(self.regexp, text):
            name = match.lastgroup
            value = match.group(0)
            span = match.span()
            rule = self.mapping[name]
            token = rule(value, span)
            yield token
项目:aapm_thoracic_challenge    作者:xf4j    | 项目源码 | 文件源码
def load(self, model_name='main'):
        checkpoint_dir = os.path.join(self.checkpoint_dir, self.model_dir)

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
            counter = int(next(re.finditer("(\d+)(?!.*\d)", ckpt_name)).group(0))
            return True, counter
        else:
            print("Failed to find a checkpoint")
            return False, 0