我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用re.finditer()。
def fetch(): retval = {} content = retrieve_content(__url__) if __check__ in content: for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content): prefix, mask = match.groups() mask = int(mask) start_int = addr_to_int(prefix) & make_mask(mask) end_int = start_int | ((1 << 32 - mask) - 1) if 0 <= end_int - start_int <= 1024: address = start_int while start_int <= address <= end_int: retval[int_to_addr(address)] = (__info__, __reference__) address += 1 return retval
def split_arg_string(string): """Given an argument string this attempts to split it into small parts.""" rv = [] for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)"' r'|\S+)\s*', string, re.S): arg = match.group().strip() if arg[:1] == arg[-1:] and arg[:1] in '"\'': arg = arg[1:-1].encode('ascii', 'backslashreplace') \ .decode('unicode-escape') try: arg = type(string)(arg) except UnicodeError: pass rv.append(arg) return rv
def _GetEndOfTableIfNotAlias(self, query: str, column_name: str) -> bool: """Getting the start of the column if it is not an alias column Args: query (str): the query to be searched column_name (str): the name to be searched for Returns: bool: 0 if no column could be found or the starting position of the column """ wrong_positions = [name.start() for name in re.finditer('.{0} as'.format(column_name), query)] found_positions = [] for space in self._POSSIBLEQUERYSEPERATOR: found_positions += [name.start() for name in re.finditer('.{0}{1}'.format(column_name, space), query)] position = set(found_positions) - set(wrong_positions) if position: return position.pop() else: return 0
def register_options(self): # type: () -> None """Parse options from text like this: Preferences: [+|-]alignArguments Enable/disable ... ... [+|-]spacesWithinPatternBinders Enable/disable ... -alignSingleLineCaseStatements.maxArrowIndent=[1-100] Set Maximum number ... -indentSpaces=[1-10] Set Number of spaces ... """ exeresult = run_executable(self.exe, ['--help'], cache=self.cache) options = [] text = unistr(exeresult.stdout) for m in re.finditer(r'^ (\[\+\|-\]|-)([a-z][a-zA-Z.]+)(?:=\[(\d+)-(\d+)\])?', text, re.MULTILINE): optionprefix, optionname, start, end = m.groups() if start is None: optiontype = 'bool' configs = [True, False] # type: List[OptionValue] else: optiontype = 'int' configs = list(inclusiverange(int(start), int(end))) options.append(option_make(optionname, optiontype, configs)) self.styledefinition = styledef_make(options)
def filter_output(self, output, regex): """ Filter output from a command """ result = {} result_list = [] if isinstance(output, str): lines = [output] else: lines = output for line in lines: iterator = re.finditer(regex, line) try: while True: cur = iterator.next() result = cur.groupdict() result['hostname'] = self.host result_list.append(result) except StopIteration: pass return result_list
def split_into_sentences(text): potential_end_pat = re.compile(r"".join([ r"([\w\.'’&\]\)]+[\.\?!])", # A word that ends with punctuation r"([‘’“”'\"\)\]]*)", # Followed by optional quote/parens/etc r"(\s+(?![a-z\-–—]))", # Followed by whitespace + non-(lowercase or dash) ]), re.U ) dot_iter = re.finditer(potential_end_pat, text) end_indices = [ (x.start() + len(x.group(1)) + len(x.group(2))) for x in dot_iter if is_sentence_ender(x.group(1)) ] spans = zip([None] + end_indices, end_indices + [None]) sentences = [ text[start:end].strip() for start, end in spans ] return sentences
def create_ad_hoc_field(cls, db_type): ''' Give an SQL column description such as "Enum8('apple' = 1, 'banana' = 2, 'orange' = 3)" this method returns a matching enum field. ''' import re try: Enum # exists in Python 3.4+ except NameError: from enum import Enum # use the enum34 library instead members = {} for match in re.finditer("'(\w+)' = (\d+)", db_type): members[match.group(1)] = int(match.group(2)) enum_cls = Enum('AdHocEnum', members) field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field return field_class(enum_cls)
def tokenize(token_specification, text): Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo']) token_specification.extend(( ('NEWLINE', r'\n'), # Line endings ('SKIP', r'.'), # Any other character )) tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification) line_num = 1 line_start = 0 for mo in re.finditer(tok_regex, text): kind = mo.lastgroup value = filter(lambda x: x is not None, mo.groups()) if kind == 'NEWLINE': line_start = mo.end() line_num += 1 elif kind == 'SKIP': pass else: column = mo.start() - line_start yield Token(kind, value, line_num, column, mo)
def fetch(): retval = {} content = retrieve_content(__url__) if __check__ in content: for match in re.finditer(r"(?m)^([\d.]+),IP used by ([^,]+) C&C", content): retval[match.group(1)] = ("%s (malware)" % match.group(2).lower(), __reference__) for row in re.finditer(r"(?s)<tr>(.+?)</tr>", content): if "<span>100%</span>" in row.group(1): domain = re.search(r"get_data_domain\('([^']+)", row.group(1)) if domain: tag = re.search(r">(trojan|spyware|adware)\.([^<]+)", row.group(1)) retval[domain.group(1)] = (("%s (malware)" % tag.group(2)) if tag else "malware", __reference__) return retval
def remove_job(self, job_guid): """ Remove a job given its GUID, including decreasing the job counter of the queue :param job_guid: the GUID of the job to remove from queue :return: void """ global _os_ver occurs = [(oc.start(), oc.end()) for oc in list(re.finditer('%s' % (BITSStateFile.JOB_HEADER_FOOTER_HEX[_os_ver].decode('hex')), self._original_data)) ] if occurs: self.set_jobs_counter(self.get_jobs_counter() - 1) state_off = self._get_job_state_off(job_guid) new_data_list = list(self._new_data) job_start_off, job_end_off = BITSStateFile._get_job_limits_by_index_in_between(occurs, state_off) new_data_list = new_data_list[:job_start_off + 1] + new_data_list[job_end_off + 1:] self._update_new_data(0, "".join(new_data_list)) self.commit()
def post(self, request, pk, **kwargs): instance = CognateClassCitation.objects.get(id=pk) form = EditCognateClassCitationForm(request.POST, instance=instance) try: # validate {ref foo ...} s = Source.objects.all().filter(deprecated=False) pattern = re.compile(r'(\{ref +([^\{]+?)(:[^\{]+?)? *\})') for m in re.finditer(pattern, form.data['comment']): foundSet = s.filter(shorthand=m.group(2)) if not foundSet.count() == 1: raise ValidationError('In field “Comment” source shorthand “%(name)s” is unknown.', params={'name': m.group(2)}) form.save() except ValidationError as e: messages.error( request, 'Sorry, the server had problems updating the cognate citation. %s' % e) return self.render_to_response({"form": form}) return HttpResponseRedirect(reverse('cognate-class-citation-detail', args=[pk]))
def after_compile(self): if self.engine.positional: self.positiontup = [] match = r'%\(([\w_]+)\)s' params = re.finditer(match, self.strings[self.statement]) for p in params: self.positiontup.append(p.group(1)) if self.engine.paramstyle=='qmark': self.strings[self.statement] = re.sub(match, '?', self.strings[self.statement]) elif self.engine.paramstyle=='format': self.strings[self.statement] = re.sub(match, '%s', self.strings[self.statement]) elif self.engine.paramstyle=='numeric': i = [0] def getnum(x): i[0] += 1 return str(i[0]) self.strings[self.statement] = re.sub(match, getnum, self.strings[self.statement])
def test_patterns(text, patterns): """Given source text and a list of patterns, look for matches for each pattern within the text and print them to stdout. """ # Look for each pattern in the text and print the results for pattern, desc in patterns: print("'{}' ({})\n".format(pattern, desc)) print(" '{}'".format(text)) for match in re.finditer(pattern, text): s = match.start() e = match.end() substr = text[s:e] n_backslashes = text[:s].count('\\') prefix = '.' * (s + n_backslashes) print(" {}'{}'".format(prefix, substr)) print() return
def pun(self, ctx): ''' Gives a random pun from the depths of the internet ''' # Read from page async with self.session.get('http://www.punoftheday.com/cgi-bin/randompun.pl') as r: page = await r.text() # Scrape the raw HTML r = r'(<div class=\"dropshadow1\">\n<p>).*(</p>\n</div>)' foundPun = [i for i in finditer(r, page)][0].group() # Filter out the pun r = r'(>).*(<)' filteredPun = [i for i in finditer(r, foundPun)][0].group() # Boop it out fullPun = filteredPun[1:-1] await self.sparcli.say(fullPun)
def steamid(self, ctx, *, gameURL:str): ''' Gets the information of a game from Steam URL ''' await self.sparcli.send_typing(ctx.message.channel) # Grab the game ID from the user input regexMatches = finditer(r'\d+', gameURL) regexList = [i for i in regexMatches] # Parse it as a group if len(regexList) == 0: await self.sparcli.say('I was unable to find the ID of that game on the Steam API.') return else: await self.getSteamGameInfo(regexList[0].group())
def messageToEmbed(message): # Get some default values that'll be in the embed author = message.author description = message.content image = False # Check to see if any images were added regexMatch = r'.+(.png)|.+(.jpg)|.+(.jpeg)|.+(.gif)' if len(message.attachments) > 0: attachment = message.attachments[0] matchList = [i for i in finditer(regexMatch, attachment['filename'])] if len(matchList) > 0: image = attachment['url'] # Get the time the message was created createdTime = '.'.join(str(message.timestamp).split('.')[:-1]) # Make and return the embed return makeEmbed(user=author, description=description, image=image, footer=createdTime)
def match_and_replace( text = None, rule = None, phoneme = None ): """ Replace found text from a single rule. """ # Find all rule matches. matches = [(match.start(), match.end()) for \ match in re.finditer(rule, text)] # Start from behind, so replace in-place. matches.reverse() # Convert to characters because strings are immutable. characters = list(text) for start, end in matches: characters[start:end] = phoneme # Convert back to string. return "".join(characters)
def unpack_dword(line): outs = '' i = 0 for m in re.finditer(r'((?:0x[0-9a-f]{8},?\s*))', line): l = m.group(0) l = l.replace(',', '') l = l.replace(' ', '') dword = int(l, 16) unpack = reversed([ (dword & 0xff000000) >> 24, (dword & 0x00ff0000) >> 16, (dword & 0x0000ff00) >> 8, (dword & 0x000000ff) ]) i += 4 for b in unpack: outs += '%02x' % b out(dbg("After callback ('%s')" % outs)) return BytesParser.formats_compiled['hexstring'].match(outs)
def extract_videos_from_page(self, page): ids_in_page = [] titles_in_page = [] for mobj in re.finditer(self._VIDEO_RE, page): # The link with index 0 is not the first video of the playlist (not sure if still actual) if 'index' in mobj.groupdict() and mobj.group('id') == '0': continue video_id = mobj.group('id') video_title = unescapeHTML(mobj.group('title')) if video_title: video_title = video_title.strip() try: idx = ids_in_page.index(video_id) if video_title and not titles_in_page[idx]: titles_in_page[idx] = video_title except ValueError: ids_in_page.append(video_id) titles_in_page.append(video_title) return zip(ids_in_page, titles_in_page)
def format_to_regex(self, fmt): """ Converts a string like '%(title)s - %(artist)s' to a regex like '(?P<title>.+)\ \-\ (?P<artist>.+)' """ lastpos = 0 regex = "" # replace %(..)s with regex group and escape other string parts for match in re.finditer(r'%\((\w+)\)s', fmt): regex += re.escape(fmt[lastpos:match.start()]) regex += r'(?P<' + match.group(1) + '>.+)' lastpos = match.end() if lastpos < len(fmt): regex += re.escape(fmt[lastpos:len(fmt)]) return regex
def extract_object(self, objname): obj = {} obj_m = re.search( (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' + r'\}\s*;', self.code) fields = obj_m.group('fields') # Currently, it only supports function definitions fields_m = re.finditer( r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function' r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', fields) for f in fields_m: argnames = f.group('args').split(',') obj[f.group('key')] = self.build_function(argnames, f.group('code')) return obj
def find_links(file): """Find all markdown links in a file object. Yield (lineno, regexmatch) tuples. """ # don't yield same link twice seen = set() # we need to loop over the file two lines at a time to support # multi-line (actually two-line) links, so this is kind of a mess firsts, seconds = itertools.tee(file) next(seconds) # first line is never second line # we want 1-based indexing instead of 0-based and one-line links get # caught from linepair[1], so we need to start at two for lineno, linepair in enumerate(zip(firsts, seconds), start=2): lines = linepair[0] + linepair[1] for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL): if match.group(0) not in seen: seen.add(match.group(0)) yield match, lineno
def split_problematic_endpoints_line(line): """ If the line of host contains more than one ":", for example: 10.99.184.69:900010.37.170.125:9006 this splits the line and return a list of correct endpoints Args: ``line``: the problemtic line which contains more than one endpoint string. Returns: the splitted list of the problematic line which has correct endpoint strings. """ colon_parts = line.strip().split(":") offset = len(colon_parts[-1]) colon_positions = [m.start() for m in re.finditer(':', line)] start = 0 split_parts = [] for colon_position in colon_positions: end = colon_position + offset + 1 split_part = line[start:end] split_parts.append(split_part) start = end return split_parts
def get_classes(self, folder, class_suffix, selected = None): classes = [] for file in glob.glob(folder + "/*.py"): handle = open(file, "r") content = handle.read() handle.close() module = folder.replace('/', '.').replace('\\', '.') + '.' + os.path.basename(file).replace('.py', '') regexp = "\sclass\s+([\w\d]+"+class_suffix+")\s*\(([\w\d]*)\)\s*:\s" for m in re.finditer(regexp, content): parent_class = m.group(2) if len(parent_class) == 0 or parent_class == 'object': continue class_name = m.group(1) classes.append(module + '.' + class_name) return classes
def analyze(line, linenum, lang): annotations = [] if lang is Language.en_EN or lang is None: weasel_words = WEASEL_WORDS_EN elif lang is Language.de_DE: weasel_words = WEASEL_WORDS_DE for weasel_pattern in weasel_words: for match in re.finditer(weasel_pattern, line, flags=re.IGNORECASE): if (not verify_match(match, line)) or (not matches_whole_words(match, line)): continue index = match.start() annotation = WeaselWord(linenum, line, index, word=match.group(0)) annotations.append(annotation) return annotations
def analyze(line, linenum, lang): annotations = [] if lang is Language.en_EN or lang is None: contractions = CONTRACTIONS_EN elif lang is Language.de_DE: contractions = CONTRACTIONS_DE for pattern in contractions: for match in re.finditer(pattern, line, flags=re.IGNORECASE): if (not verify_match(match, line)) or (not matches_whole_words(match, line)): continue index = match.start() replaced_contraction = re.sub(pattern, contractions[pattern], match.group(0), flags=re.IGNORECASE) annotation = Contraction(linenum, line, index, word=match.group(0), contraction=replaced_contraction) annotations.append(annotation) return annotations
def paged_github_json_request(url, headers=None): response = requests.get(url, headers=headers) assert response.ok, response.content results = response.json() if 'Link' in response.headers: links = response.headers['Link'] # There are likely better ways to parse/extract the link information # but here we just find the last page number mentioned in the header # 'Link' section and then loop over all pages to get the comments last_match = list(re.finditer('page=[0-9]+', links))[-1] last_page = int(links[last_match.start():last_match.end()].split('=')[1]) # If there are other pages, just loop over them and get all the # comments if last_page > 1: for page in range(2, last_page + 1): response = requests.get(url + '?page={0}'.format(page), headers=headers) assert response.ok, response.content results += response.json() return results
def getOfflineMediaList(self, folderName=False, title=False, contentType=7): mediaFiles = [] for r1 in re.finditer('\{(.*?)\"spaces\"\:' , entryS, re.DOTALL): entry = r1.group(1) media = self.getMediaPackage(entry, folderName=folderName, contentType=contentType, fanart=folderFanart, icon=folderIcon) if media is not None: mediaFiles.append(media) return mediaFiles ## # retrieve a list of videos, using playback type stream # parameters: prompt for video quality (optional), cache type (optional) # returns: list of videos ##
def get_occurences(self, pattern, overlap=False): """Return position of the input pattern in the sequence :: >>> from sequana import Sequence >>> s = Sequence('ACGTTTTACGT') >>> s.get_occurences("ACGT") [0, 7] """ if overlap is False: res = [m.start() for m in re.finditer(pattern, self.sequence)] elif overlap is True: res = [m.start() for m in re.finditer('(?=%s)'%pattern, self.sequence)] return res # reverse find-all without overlaps, you can combine positive and # negative lookahead into an expression like this: #res = [m.start() for m in re.finditer('(?=%s)(?!.{1,%d}%s)' % (search, # len(pattern)-1, pattern), 'ttt')]
def find_cute(url): # ?????? r = requests.get(url) # ??r.encoding encoding = re.search('content="text/html;\s*charset=(.*?)"', r.text).group(1) r.encoding = encoding # print(r.text) finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text) i = random.randint(0, sum(1 for _ in finds)) start = 0 finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text) for f in finds: if start == i: print(f.group(1)) break start += 1
def _proc_gnusparse_00(self, next, pax_headers, buf): """Process a GNU tar extended sparse header, version 0.0. """ offsets = [] for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): offsets.append(int(match.group(1))) numbytes = [] for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): numbytes.append(int(match.group(1))) next.sparse = list(zip(offsets, numbytes))
def get_selects_from_text(content): sqls = [] select_keyword = '@Select\s*\(' for m in re.finditer(select_keyword, content): rparen_pos = MybatisInlineSqlExtractor.find_right_paren_pos(content[m.end():]) if rparen_pos < 0: continue sqls.append(SQL('', eval(content[m.end():m.end() + rparen_pos].replace('\r', '').replace('\n', '')).strip())) return sqls
def get_properties_dict(self, properties_str): if isinstance(properties_str, dict): return properties_str properties = collections.OrderedDict() pattern = re.compile('(\S+?)\s*"(.*?)"') for m in re.finditer(pattern, properties_str): key = m.group(1) value = m.group(2) properties[key] = value return properties
def get_unambiguous_regions(reference_path): '''Calculate regions corresponding to unambiguous bases''' chrom_map = {} for chrom, seq in open_reference(reference_path).items(): regions = [(m.start(), m.end()) for m in re.finditer('[acgtACGT]+', seq[:])] chrom_map[chrom] = Regions(regions=regions) return chrom_map
def tokenize_sents(string): """ Tokenize input text to sentences. :param string: Text to tokenize :type string: str or unicode :return: sentences :rtype: list of strings """ string = six.text_type(string) spans = [] for match in re.finditer('[^\s]+', string): spans.append(match) spans_count = len(spans) rez = [] off = 0 for i in range(spans_count): tok = string[spans[i].start():spans[i].end()] if i == spans_count - 1: rez.append(string[off:spans[i].end()]) elif tok[-1] in ['.', '!', '?', '…', '»']: tok1 = tok[re.search('[.!?…»]', tok).start()-1] next_tok = string[spans[i + 1].start():spans[i + 1].end()] if (next_tok[0].isupper() and not tok1.isupper() and not (tok[-1] != '.' or tok1[0] == '(' or tok in ABBRS)): rez.append(string[off:spans[i].end()]) off = spans[i + 1].start() return rez
def register_options(self): # type: () -> None """Parse options from text like this: # Uncrustify 0.63 # # General options # newlines { Auto, LF, CR, CRLF } The type of line endings input_tab_size Number The original size of tabs in the input indent_align_string { False, True } Whether to indent strings broken by '\' so that they line up """ exeresult = run_executable(self.exe, ['--show-config'], cache=self.cache) options = [] text = unistr(exeresult.stdout) for m in re.finditer(r'^(\w+)\s+(.*?)\s*$', text, re.MULTILINE): optionname, optiondesc = m.group(1), m.group(2) if optiondesc.startswith('{'): optiontype = 'Enum' configs = optiondesc[1:-1].strip().split(', ') configs = [c.lower() for c in configs] else: optiontype = optiondesc configs = [] options.append(option_make(optionname, optiontype, configs)) self.styledefinition = styledef_make(options)
def __handle_tostring(self): for match in re.finditer('(\d+)\[t\+o\+S\+t\+r\+i\+n\+g\](\d+)', self.js): repl = to_base(match.group(1), match.group(2)) self.js = self.js.replace(match.group(0), repl)
def __get_attribs(element): attribs = {} for match in re.finditer('''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''', element): match = match.groupdict() value1 = match.get('value1') value2 = match.get('value2') value = value1 if value1 is not None else value2 if value is None: continue attribs[match['key'].lower().strip()] = value return attribs
def album(URL): track_list=[] if (URL.find('?index=')>0): all_track_nr=((html.count('?index='))//2)-1 a1=URL[:URL.find('?index=')] current_track_no=int(URL[len(a1)+len('?index='):]) ID=a1[a1.find('/album/')+len('/album/'):] track_list.append('%s'%current_track_no) elif (URL.find('?start')>0): all_track_nr=((html.count('?index='))//2)-1 a1=URL[:URL.find('?start')] current_track_no=int(URL[len(a1)+len('?start'):]) ID=a1[a1.find('/album/')+len('/album/'):] track_list.append('%s'%current_track_no) else: all_track_nr=(html.count('?index='))//2 a1=URL current_track_no='null' ID=a1[a1.find('/album/')+len('/album/'):] track_list.append('%s'%current_track_no) i=0 b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')] b_len=len('<span class="song_name">') iter=re.finditer(r'<span class="song_name">', b) indices=[m.start(0) for m in iter] while i<all_track_nr: track_list.append('%s?index=%d'%(a1,i)) d=(b[indices[i]:].find('</span>')) track_name=b[indices[i]+b_len:indices[i]+d] track_list.append(track_name) i+=1 return(track_list)
def legacy_format_json(original): # save state states = [] text = original # save position for double-quoted texts for i, pos in enumerate(re.finditer('"', text)): # pos.start() is a double-quote p = pos.start() + 1 if i % 2 == 0: nxt = text.find('"', p) states.append((p, text[p:nxt])) # replace all weired characters in text while text.find(',,') > -1: text = text.replace(',,', ',null,') while text.find('[,') > -1: text = text.replace('[,', '[null,') # recover state for i, pos in enumerate(re.finditer('"', text)): p = pos.start() + 1 if i % 2 == 0: j = int(i / 2) nxt = text.find('"', p) # replacing a portion of a string # use slicing to extract those parts of the original string to be kept text = text[:p] + states[j][1] + text[nxt:] converted = json.loads(text) return converted
def make_RefCmap(fasta_file, enz=None, min_len=20, min_nsite=5, path=None): name = fasta_file.rsplit('.',1)[0].split('/')[-1] index = 0 enzymes = {'BspQI':'GCTCTTC', 'BbvCI':'CCTCAGC', 'Bsml':'GAATGC', 'BsrDI':'GCAATG', 'bseCI':'ATCGAT', 'BssSI':'CACGAG'} try: cmap_file='%s/%s_%s.cmap'%(path,name,enz) forwards = enzymes[enz] reverse = str(Seq(forwards).reverse_complement()) with open (cmap_file,'a') as ref_cmap: ref_cmap.write('# CMAP File Version:\t0.1\n') ref_cmap.write('# Label Channels:\t1\n') ref_cmap.write('# Nickase Recognition Site 1:\t%s\n'%forwards) ref_cmap.write('# Enzyme1:\tNt.%s\n'%enz) ref_cmap.write('# Number of Consensus Nanomaps:\tN/A\n') ref_cmap.write('#h CMapId\tContigLength\tNumSites\tSiteID\tLabelChannel\tPosition\tStdDev\tCoverage\tOccurrence\n') ref_cmap.write('#f int\tfloat\tint\tint\tint\tfloat\tfloat\tint\tint\n') for seqs in SeqIO.parse(fasta_file,'fasta'): seq = str(seqs.seq.upper()) seq_len = len(seq) index+=1 if seq_len >= min_len*1000: nsites = len(re.findall('%s|%s'%(forwards,reverse),seq)) if nsites >=min_nsite: j=1 for o in re.finditer('%s|%s'%(forwards,reverse),seq): ref_cmap.write('%s\t%.1f\t%d\t%d\t1\t%.1f\t1.0\t1\t1\n'%(index,seq_len,nsites,j,o.start()+1)) j+=1 ref_cmap.write('%s\t%.1f\t%d\t%d\t0\t%.1f\t0.0\t1\t0\n'%(index,seq_len,nsites,j,seq_len)) except: pass
def formatString(self, text, stats, not_found_replacement = None): #try: values = stats['values'] for m in re.finditer("{{([gc]:)?([^}:]*)((:d)|(:1f)|:(\d+)|:(\d+)\.(\d+)f|(:\+d)|(:\+1f))?}}", text): g, g1, key, g2, sg1, sg2, sg3, sg4a, sg4b, sg5, sg6 = m.group(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) if not key in values: if not_found_replacement is None: if d: LOG_NOTE('No key in values of %s (%s)' % (stats.get('_type', 'unknown'), key)) else: text = text.replace('%s' % g, not_found_replacement) elif g1 is None: if g2 is None: text = text.replace('{{%s}}' % key, self.applyMacros(values[key])) elif sg1: text = text.replace('{{%s:d}}' % key, self.applyMacros(values[key], 0)) elif sg2: text = text.replace('{{%s:1f}}' % key, self.applyMacros(values[key], 1)) elif sg3: xx = int(sg3) text = text.replace('{{%s:%d}}' % (key, xx), self.applyMacros2(values[key], xx)) elif sg4a: xx, yy = int(sg4a), int(sg4b) text = text.replace('{{%s:%d.%df}}' % (key, xx, yy), self.applyMacros2(values[key], xx, yy)) elif sg5: text = text.replace('{{%s:+d}}' % key, self.applyMacros(values[key], 0, '+')) elif sg6: text = text.replace('{{%s:+1f}}' % key, self.applyMacros(values[key], 1, '+')) elif g1=="g:": text = text.replace('{{g:%s}}' % key, stats['gradient'][key]) elif g1=="c:": text = text.replace('{{c:%s}}' % key, stats['palette'][key]) #except: # LOG_CURRENT_EXCEPTION() #finally: return text
def __call__(self, text): for match in re.finditer(self.regexp, text): name = match.lastgroup value = match.group(0) span = match.span() rule = self.mapping[name] token = rule(value, span) yield token
def load(self, model_name='main'): checkpoint_dir = os.path.join(self.checkpoint_dir, self.model_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) counter = int(next(re.finditer("(\d+)(?!.*\d)", ckpt_name)).group(0)) return True, counter else: print("Failed to find a checkpoint") return False, 0