我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用re.S。
def getDetailList(self,content): s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"' pattern =re.compile(s2 , re.S ) result = re.findall(pattern, content) with open('file.txt','w',encoding='gbk') as f: f.write(content) if not result: print('???????..............') threadsList=[] for item in result: t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path)) threadsList.append(t) t.start() for threadid in threadsList: threadid.join()
def split_arg_string(string): """Given an argument string this attempts to split it into small parts.""" rv = [] for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)"' r'|\S+)\s*', string, re.S): arg = match.group().strip() if arg[:1] == arg[-1:] and arg[:1] in '"\'': arg = arg[1:-1].encode('ascii', 'backslashreplace') \ .decode('unicode-escape') try: arg = type(string)(arg) except UnicodeError: pass rv.append(arg) return rv
def getDetailList(self,content): pattern =re.compile(r'<h2><a target="_blank" href="(.*?)"'\ +r'title="(.*?)">', re.S ) #uf-8?????? file = open('file.txt', 'w',encoding='gbk') file.write(content) file.close() result = re.findall(pattern, content) if not result: print('???????..............') for item in result: self.getDetailPic(item)
def get_module_source_metadata(cls, module_source, full_line_map=False): source_map = re.search( r"__M_BEGIN_METADATA(.+?)__M_END_METADATA", module_source, re.S).group(1) source_map = compat.json.loads(source_map) source_map['line_map'] = dict( (int(k), int(v)) for k, v in source_map['line_map'].items()) if full_line_map: f_line_map = source_map['full_line_map'] = [] line_map = source_map['line_map'] curr_templ_line = 1 for mod_line in range(1, max(line_map)): if mod_line in line_map: curr_templ_line = line_map[mod_line] f_line_map.append(curr_templ_line) return source_map
def __init__(self, code, **exception_kwargs): self.codeargs = [] self.args = [] self.declared_identifiers = set() self.undeclared_identifiers = set() if isinstance(code, compat.string_types): if re.match(r"\S", code) and not re.match(r",\s*$", code): # if theres text and no trailing comma, insure its parsed # as a tuple by adding a trailing comma code += "," expr = pyparser.parse(code, "exec", **exception_kwargs) else: expr = code f = pyparser.FindTuple(self, PythonCode, **exception_kwargs) f.visit(expr)
def __init__(self, code, **exception_kwargs): m = re.match(r'^(\w+)(?:\s+(.*?))?:\s*(#|$)', code.strip(), re.S) if not m: raise exceptions.CompileException( "Fragment '%s' is not a partial control statement" % code, **exception_kwargs) if m.group(3): code = code[:m.start(3)] (keyword, expr) = m.group(1, 2) if keyword in ['for', 'if', 'while']: code = code + "pass" elif keyword == 'try': code = code + "pass\nexcept:pass" elif keyword == 'elif' or keyword == 'else': code = "if False:pass\n" + code + "pass" elif keyword == 'except': code = "try:pass\n" + code + "pass" elif keyword == 'with': code = code + "pass" else: raise exceptions.CompileException( "Unsupported control keyword: '%s'" % keyword, **exception_kwargs) super(PythonFragment, self).__init__(code, **exception_kwargs)
def compile_rules(environment): """Compiles all the rules from the environment into a list of rules.""" e = re.escape rules = [ (len(environment.comment_start_string), 'comment', e(environment.comment_start_string)), (len(environment.block_start_string), 'block', e(environment.block_start_string)), (len(environment.variable_start_string), 'variable', e(environment.variable_start_string)) ] if environment.line_statement_prefix is not None: rules.append((len(environment.line_statement_prefix), 'linestatement', r'^[ \t\v]*' + e(environment.line_statement_prefix))) if environment.line_comment_prefix is not None: rules.append((len(environment.line_comment_prefix), 'linecomment', r'(?:^|(?<=\S))[^\S\r\n]*' + e(environment.line_comment_prefix))) return [x[1:] for x in sorted(rules, reverse=True)]
def parse_page(self, response): pattern = re.compile('gp.insertPrx\((.*?)\)', re.S) items = re.findall(pattern, response.body) for item in items: data = json.loads(item) #????????? port = data.get('PROXY_PORT') port = str(int(port, 16)) proxy = Proxy() proxy.set_value( ip = data.get('PROXY_IP'), port = port, country = data.get('PROXY_COUNTRY'), anonymity = data.get('PROXY_TYPE'), source = self.name, ) self.add_proxy(proxy = proxy)
def parse_page(self, response): pattern = re.compile( '<tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(' '.*?)</td><td>(.*?)</td></tr>', re.S) items = re.findall(pattern, response.body) if items is not None: for item in items: proxy = Proxy() proxy.set_value( ip = item[0], port = item[1], country = item[3], anonymity = item[4], source = self.name, ) self.add_proxy(proxy)
def parse_page(self, response): pattern = re.compile('<tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr>', re.S) items = re.findall(pattern, response.body) for i, item in enumerate(items): if i >= 1: proxy = Proxy() proxy.set_value( ip = item[0], port = item[1], country = item[2], anonymity = item[3], source = self.name ) self.add_proxy(proxy = proxy)
def parse_page(self, response): pattern = re.compile( '<tr>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(' '.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?</tr>', re.S) items = re.findall(pattern, response.body) for item in items: proxy = Proxy() proxy.set_value( ip = item[0], port = item[1], country = item[4], anonymity = item[2], source = self.name, ) self.add_proxy(proxy)
def update_consts(filename, constname, content): with open(filename) as f: data = f.read() # Line to start/end inserting re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S) m = re_match.search(data) if not m: raise ValueError('Could not find existing definition for %s' % (constname,)) new_block = format_lines(constname, content) data = data[:m.start()] + new_block + data[m.end():] with open(filename, 'w') as f: f.write(data)
def getMovieSaveFile(self, moviename): if re.search('[Ss][0-9]+[Ee][0-9]+', moviename) is not None: tvseries = compile('(.*\w)[\s\.|-]+[S|s][0-9]+[E|e][0-9]+[\s\.|-].*?\.[ts|avi|mkv|divx|f4v|flv|img|iso|m2ts|m4v|mov|mp4|mpeg|mpg|mts|vob|wmv]') tvseriesalt = compile('^[S|s][0-9]+[E|e][0-9]+[\s\.\-](.*\w)\.[ts|avi|mkv|divx|f4v|flv|img|iso|m2ts|m4v|mov|mp4|mpeg|mpg|mts|vob|wmv]') if tvseries.match(moviename) is not None: return tvseries.match(moviename).groups()[0].replace(" ","_").replace(".","_") elif tvseriesalt.match(moviename) is not None: return tvseriesalt.match(moviename).groups()[0].replace(" ","_").replace(".","_") else: return None else: movietitle = compile('(.*\w)\.[ts|avi|mkv|divx|f4v|flv|img|iso|m2ts|m4v|mov|mp4|mpeg|mpg|mts|vob|wmv]') if movietitle.match(moviename) is not None: return movietitle.match(moviename).groups()[0].replace(" ","_").replace(".","_") else: return None
def getInfos(self, data, id, type, filename): if type == "movie": infos = re.findall('"genres":\[(.*?)\].*?"overview":"(.*?)"', data, re.S) if infos: (genres, desc) = infos[0] genre = re.findall('"name":"(.*?)"', genres, re.S) genre = str(genre).replace('\'','').replace('[','').replace(']','') self.writeTofile(decodeHtml(desc), filename) elif type == "serie": infos = re.findall('<Overview>(.*?)</Overview>', data, re.S) if infos: desc = infos[0] self.writeTofile(decodeHtml(desc), filename) self.close(False)
def __init__(self, text = "", engine=None, bindparams=None, typemap=None, escape=True): self.parens = False self._engine = engine self.id = id(self) self.bindparams = {} self.typemap = typemap if typemap is not None: for key in typemap.keys(): typemap[key] = engine.type_descriptor(typemap[key]) def repl(m): self.bindparams[m.group(1)] = bindparam(m.group(1)) return self.engine.bindtemplate % m.group(1) if escape: self.text = re.compile(r':([\w_]+)', re.S).sub(repl, text) else: self.text = text if bindparams is not None: for b in bindparams: self.bindparams[b.key] = b
def fetch_xml(url): with request.urlopen(url) as f: print('Status:', f.status, f.reason) for k, v in f.getheaders(): print('%s: %s' % (k, v)) html = f.read().decode('utf-8') pattern_one = re.compile(r'<yweather:location.*?city="(.*?)".*?country="(.*?)".*?region="(.*?)".*?/>', re.S) pattern_two = re.compile(r'<yweather:forecast.*?date="(.*?)".*?day="(.*?)".*?high="(.*?)".*?low="(.*?)".*?text="(.*?)".*?/>', re.S) location_info = re.findall(pattern_one, html) items = re.findall(pattern_two, html) weather = {} weather['city'] = location_info[0][0] weather['country'] = location_info[0][1] weather['region'] = location_info[0][2] for item in items: weather[item[1]] = {} weather[item[1]]['data'] = item[0] weather[item[1]]['high'] = item[2] weather[item[1]]['low'] = item[3] weather[item[1]]['text'] = item[4] return weather
def unhighlight(text): hits = re.findall( '<div class="highlight"><pre><span></span>(?P<text>.+?)</pre></div>', text, re.M | re.S) for h in hits: # print 'h',h.strip() if h.strip(): if h.find('<span') == -1: # it's note # print 'no span' h_and_context = re.findall( r'<div class="highlight"><pre><span></span>' + re.escape(h) + '</pre></div>', text, re.M | re.S) if h_and_context: h_and_context = h_and_context[0] h_and_context_unhigh = h_and_context.replace( '<div class="highlight">', '').replace('</pre></div>', '</pre>') text = text.replace(h_and_context, h_and_context_unhigh) else: h_and_context = re.findall( r'<div class="highlight"><pre><span></span>' + re.escape(h) + '</pre></div>', text, re.M | re.S) # print h_and_context return text
def fetch(remote_file, local_tmp_file, local_file, diff_tool): cmd = 'scp %s %s' % (remote_file, local_tmp_file) print(cmd) os.system(cmd) with open(local_tmp_file) as f: txt = f.read() ntxt = re.sub('Table of Contents.+markdown-toc.go\)', '[tableofcontent]', txt, flags=re.S) with open(local_tmp_file, 'w') as f: f.write(ntxt) cmd = 'diff ' + local_tmp_file + ' ' + local_file o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if o.stdout.read().strip(): # if diff empty, file are the same cmd = diff_tool + ' ' + local_tmp_file + ' ' + local_file o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) o.communicate() o.wait() else: print('The files are the same') # main
def _do_code_blocks(self, text): """Process Markdown `<pre><code>` blocks.""" code_block_re = re.compile(r''' (?:\n\n|\A\n?) ( # $1 = the code block -- one or more lines, starting with a space/tab (?: (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces .*\n+ )+ ) ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc # Lookahead to make sure this block isn't already in a code block. # Needed when syntax highlighting is being used. (?![^<]*\</code\>) ''' % (self.tab_width, self.tab_width), re.M | re.X) return code_block_re.sub(self._code_block_sub, text)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # TODO store the loader or env on the tool factory for faster partial builds # (this would need to cope with new files) self._loader = FrontMatterFileSystemLoader(self._config.jinja_directories) self._env = Environment(loader=self._loader) self._env.filters['S'] = self.static_file_filter self._file_ctxs = {} self._initialise_templates() self._ctx = self._config.context self._library = self._config.find_library() self._library_files = walk(self._library) if self._library else [] self._extra_files = []
def next(self): # log('video ' + str(episodes[self.current][CONSTANTS.D_SOURCE]) + ',' + str(episodes[self.current][CONSTANTS.D_SHOW])) # addVideo('plugin://plugin.video.gdrive?mode=playvideo&title='+episodes[video][0], # { 'title' : str(episodes[video][CONSTANTS.D_SHOW]) + ' - S' + str(episodes[video][CONSTANTS.D_SEASON]) + 'xE' + str(episodes[video][CONSTANTS.D_EPISODE]) + ' ' + str(episodes[video][CONSTANTS.D_PART]) , 'plot' : episodes[video][CONSTANTS.D_SHOW] }, # img='None') # play video # if self.isExit == 0: self.play('plugin://plugin.video.gdrive-testing/?mode=video&instance='+str(self.service.instanceName)+'&title='+self.content[self.current][0]) #self.play('plugin://plugin.video.gdrive/?mode=video&instance='+str(self.service.instanceName)+'&title='+self.content[self.current][0]) # self.play(self.content[self.current][0]) # self.tvScheduler.setVideoWatched(self.worksheet, self.content[self.current][0]) # self.tvScheduler.createRow(self.worksheet, '','','','') if self.current < len(self.content): self.current += 1 else: self.current = 0
def detect_test_from_requests(self, stream_mode=False): "Check in raw first line of requests for an httpwookiee test marker." detected = False if stream_mode: import six block = six.text_type(self.stream) else: block = self.requests[0].first_line.raw.decode('utf8') matches = re.match(r'.*httpw=--(.*)--.*', block, re.S) if matches: request_test_id = matches.group(1) if self.test_id == request_test_id: detected = True return detected
def parse_one_page(html): pattern = re.compile( '<li.*?blue-link".*?>(.*?)</a>.*?title".*?href="(.*?)">(.*?)</a>.*?abstract">(.*?)</p>.*?ic-list-read">.*?' +'</i>(.*?)</a>.*?ic-list-comments.*?</i>(.*?)</a>.*?ic-list-like.*?</i>(.*?)</span>.*?ic-list-money.*?</i>(.*?)</span>.*?</li>', re.S) items=re.findall(pattern,html) for item in items: yield { 'author':item[0], 'link':"http://www.jianshu.com"+item[1], 'title':item[2], 'abstract':item[3].strip(), 'read-num':item[4].strip(), 'comment-num':item[5].strip(), 'like-num':item[6], 'money-num':item[7] }
def test_blankpage(self) -> None: _re = re.compile( '\s*<!DOCTYPE html>' '\s*<html>' '\s*<head>' '\s*<meta charset="utf-8">' '\s*<title>' '\s*W-DOM' '\s*</title>' '(\s*<script type="text/javascript">.*?</script>)?' '\s*</head>' '\s*<body>' '\s*<script type="text/javascript">' '.*?</script>' '\s*</body>' '.*</html>', re.S ) html = self.doc.build() self.assertIsNotNone(_re.match(remove_wdom_id(html)))
def detach_signature(txt): # See RFC 4880, section 7 # cf. debian.deb822.Deb822.split_gpg_and_payload (which doesn't handle dash # escaping and doesn't verify that the input is well-formed) m = re.match(r'^\s*-----BEGIN PGP SIGNED MESSAGE-----\n' r'(?:[^\n]+\n)*' r'\n' r'(.*)\n' r'-----BEGIN PGP SIGNATURE-----\n' r'(.*)\n' r'-----END PGP SIGNATURE-----\s*$', re.sub(r'\r\n?', '\n', txt), flags=re.S) if m: ### TODO: Also return the armor headers? return (re.sub('^- ', '', m.group(1), flags=re.M).replace('\n', '\r\n'), m.group(2)) else: return (txt, None)
def handlehtml(url): headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: request = urllib2.Request(url, None, headers) html = urllib2.urlopen(request) data = html.read() #---------------------------- reg = re.compile(r"http://\w*?\.yinyuetai\.com/uploads/videos/common/.*?(?=&br)", re.S) findlist = re.findall(reg, data) #HC(432p) HD(540p) HE(720p) # if len(findlist) >= 3: return findlist[2] elif len(findlist) >= 2: return findlist[1] else: return findlist[0] except: print 'Reading vodeolist failed!'
def splitpasswd(user): '''urllib.splitpasswd(), but six's support of this is missing''' _passwdprog = re.compile('^([^:]*):(.*)$', re.S) match = _passwdprog.match(user) if match: return match.group(1, 2) return user, None
def now_date(self): # ?????? now = datetime.datetime.now() # ->???????? # ????????: formateDate = now.strftime("%Y%m%d%H%M%S") return formateDate # ???????
def parse_attr(self): """??????""" attr_p1 = self.html("p[class='col-xs-6']") text1 = attr_p1.text() type_result = re.search('??? (.+) ??', text1) if type_result: self.type_ = type_result.group(1) nation_result = re.search('??? (.+) ??', text1) if nation_result: self.nation = nation_result.group(1) language_result = re.search('??? (.+) ??', text1) if language_result: self.language = language_result.group(1) duration_result = re.search('??? (.+) ????', text1) if duration_result: self.duration = duration_result.group(1) year_result = re.search('????? (.+) ????', text1) if year_result: self.date = year_result.group(1) douban_result = re.search('????? (.+) ??', text1) if douban_result: self.douban = douban_result.group(1) attr_p2 = self.html("p[class='col-lg-7 col-md-7 col-sm-7 col-xs-12']") text2 = attr_p2.text() actor_result = re.search('??? (.+) ??', text2, flags=re.S) if actor_result: self.actor = actor_result.group(1) director_result = re.search('??? (.+)$', text2) if director_result: self.director = director_result.group(1)
def _parse_name(self, information): """??????""" search_result = re.search('?? ???(.+?)?', information) if not search_result: search_result = re.search('?????(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?? ????(.+?)?', information) if search_result: self.name = search_result.group(1).strip()
def _parse_original_name(self, information): """??????""" search_result = re.search('?? ???(.+?)?', information) if not search_result: search_result = re.search('?????(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?? ????(.+?)?', information, flags=re.S) if search_result: self.original_name = search_result.group(1).strip()
def _parse_nation(self, information): """????""" search_result = re.search('?? ???(.+?)?', information) if not search_result: search_result = re.search('?????(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?????(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?? ????(.+?)?', information, flags=re.S) if search_result: self.nation = search_result.group(1).strip()
def _parse_type(self, information): """????""" search_result = re.search('?? ???(.+?)?', information) if not search_result: search_result = re.search('?????(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?? ????(.+?)?', information, flags=re.S) if search_result: self.type_ = search_result.group(1).strip()
def _parse_imdb(self, information): """??IMDB??""" search_result = re.search('?IMDB ? ??(.+?)?', information) if not search_result: search_result = re.search('?IMDB??(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?IMDb??(.+?)?', information, flags=re.S) if not search_result: search_result = re.search('?IMDB????(.+?)?', information, flags=re.S) if search_result: self.imdb = search_result.group(1).strip()
def _parse_douban(self, information): """??????""" search_result = re.search('?????(.+?)?', information, flags=re.S) if search_result: self.douban = search_result.group(1).strip()