Python re 模块,findall() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用re.findall()

项目:ln2sql    作者:FerreroJeremy    | 项目源码 | 文件源码
def create_table(self, table_string):
        lines = table_string.split("\n")
        table = Table()
        for line in lines:
            if 'TABLE' in line:
                table_name = re.search("`(\w+)`", line)
                table.name = table_name.group(1)
                if self.thesaurus_object is not None:
                    table.equivalences = self.thesaurus_object.get_synonyms_of_a_word(table.name)
            elif 'PRIMARY KEY' in line:
                primary_key_columns = re.findall("`(\w+)`", line)
                for primary_key_column in primary_key_columns:
                    table.add_primary_key(primary_key_column)
            else:
                column_name = re.search("`(\w+)`", line)
                if column_name is not None:
                    column_type = self.predict_type(line)
                    if self.thesaurus_object is not None:
                        equivalences = self.thesaurus_object.get_synonyms_of_a_word(column_name.group(1))
                    else:
                        equivalences = []
                    table.add_column(column_name.group(1), column_type, equivalences)
        return table
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def getDetailList(self,content):
        s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
        pattern =re.compile(s2 , re.S
            )
        result = re.findall(pattern, content)
        with open('file.txt','w',encoding='gbk') as f:
            f.write(content)

        if not result:
            print('???????..............')


        threadsList=[] 
        for item in result:
            t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
            threadsList.append(t)
            t.start()

        for threadid in threadsList:
            threadid.join()
项目:Python    作者:Guzi219    | 项目源码 | 文件源码
def ParseHtml(self, html):
        soup = BeautifulSoup(html)
        links = soup.findAll('a', attrs={'class': 'ulink'})
        #print len(links)
        if len(links) == 0: #the js return
            # tmp_js = soup.find(name='script', attrs={'language': 'javascript'})
            js_str = soup.script.string #two ways to get the <script></script>
            new_url = js_str[16:-1] #get the new url
            new_url = eval(new_url) #eval:??????????
            self.ParseHtml(self.LoadPage(new_url))
        else:
            # print type(links)
            for link in links:
                # print type(link)
                # print type(link.string)
                # print unicode(link.string)
                titles = re.findall(r'?(.+?)?', str(link.string)) #unicode(link.string))
                if len(titles) <> 0:
                    print titles[0]
                # print 'url is %s, title is %s.' %(link['href'], titles[0])
项目:Stitch    作者:nathanlopez    | 项目源码 | 文件源码
def get_profiles():
    passwd=''
    netsh_output = run_command("netsh wlan show profiles")
    if "not running" in netsh_output:
        net_wlan = run_command("net start wlansvc")
        if "started successfully" in net_wlan:
            netsh_output = run_command("netsh wlan show profiles")
        else:
            return net_wlan
    if "no wireless interface" in netsh_output:
        return netsh_output
    else:
        profiles=re.findall(': (.*)\r',netsh_output)
        for x in profiles:
            output= run_command('netsh wlan show profiles "{}" key=clear'.format(x))
            #output=re.findall('(Key Content.*)\r',proc)
            if output:
                passwd += "\n{}\n{}\n\n".format(x,output)
        return passwd
项目:python-    作者:secondtonone1    | 项目源码 | 文件源码
def getDetailList(self,content):
        s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
        pattern =re.compile(s2 , re.S
            )
        result = re.findall(pattern, content)
        with open('file.txt','w',encoding='gbk') as f:
            f.write(content)

        if not result:
            print('???????..............')


        threadsList=[] 
        for item in result:
            t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
            threadsList.append(t)
            t.start()

        for threadid in threadsList:
            threadid.join()
项目:ochem_predict_nn    作者:connorcoley    | 项目源码 | 文件源码
def reassign_atom_mapping(transform):
    '''This function takes an atom-mapped reaction and reassigns 
    the atom-mapping labels (numbers) from left to right, once 
    that transform has been canonicalized.'''

    all_labels = re.findall('\:([0-9]+)\]', transform)

    # Define list of replacements which matches all_labels *IN ORDER*
    replacements = []
    replacement_dict = {}
    counter = 1
    for label in all_labels: # keep in order! this is important
        if label not in replacement_dict:
            replacement_dict[label] = str(counter)
            counter += 1
        replacements.append(replacement_dict[label])

    # Perform replacements in order
    transform_newmaps = re.sub('\:[0-9]+\]', 
        lambda match: (':' + replacements.pop(0) + ']'),
        transform)

    return transform_newmaps
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def _read_from_header(self):

        a, b, c                = self._get_header()
        header                 = a
        header['data_offset']  = b
        header['nb_channels']  = c
        #header['dtype_offset'] = int(header['ADC zero'])
        header['gain']         = float(re.findall("\d+\.\d+", header['El'])[0])
        header['data_dtype']   = self.params['data_dtype']

        self.data   = numpy.memmap(self.file_name, offset=header['data_offset'], dtype=header['data_dtype'], mode='r')
        self.size   = len(self.data)
        self._shape = (self.size//header['nb_channels'], header['nb_channels'])
        del self.data

        return header
项目:sopel-modules    作者:phixion    | 项目源码 | 文件源码
def title_command(bot, trigger):
    """
    Show the title or URL information for the given URL, or the last URL seen
    in this channel.
    """
    if not trigger.group(2):
        if trigger.sender not in bot.memory['last_seen_url']:
            return
        matched = check_callbacks(bot, trigger,
                                  bot.memory['last_seen_url'][trigger.sender],
                                  True)
        if matched:
            return
        else:
            urls = [bot.memory['last_seen_url'][trigger.sender]]
    else:
        urls = re.findall(url_finder, trigger)

    results = process_urls(bot, trigger, urls)
    for title, domain in results[:4]:
        bot.reply('[ %s ] - %s' % (title, domain))
项目:pyselenium-js    作者:neetjn    | 项目源码 | 文件源码
def test_trigger_single_event(self):
        """Test: Trigger click event on button, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event='click'
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+1,
            'Counter label was not modified as expected; %s clicks' % modified
        )
项目:pyselenium-js    作者:neetjn    | 项目源码 | 文件源码
def test_trigger_multiple_events(self):
        """Test: Trigger click event on button twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event=('click', 'click')
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+2,
            'Counter label was not modified as expected; %s clicks' % modified
        )
项目:pyselenium-js    作者:neetjn    | 项目源码 | 文件源码
def test_trigger_multiple_events_multiple_elements(self):
        """Test: Trigger click event on two buttons twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        num_counter_original = eval(re.findall(regex, self.page.counter_label.text)[0])
        num_users_original = len(self.page.user_cards)
        self.page.js.trigger_event(
            element=(self.page.add_counter_button, self.page.add_user_button),
            event=('click', 'click')
        )
        for i in range(10):
            if (num_counter_original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        num_counter_modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            num_counter_modified, num_counter_original+2,
            'Counter label was not modified as expected; %s clicks' % num_counter_modified
        )
        self.assertEqual(
            len(self.page.user_cards), num_users_original+2,
            'Expected %s user cards found %s' % (
                num_users_original+2, len(self.page.user_cards)
            )
        )
项目:trf    作者:aistairc    | 项目源码 | 文件源码
def calc_rs_modality(self) -> Dict[str, float]:

        modality_counter = Counter()
        for i, s in enumerate(self.sentences):
            chunks = []
            for bnst in self.knp.parse(s).bnst_list():
                chunk = Chunk(chunk_id=bnst.bnst_id,
                              link=bnst.parent,
                              description=bnst.fstring)
                chunks.append(chunk)

            s = "".join([chunk.description for chunk in chunks])
            ms = set(re.findall("<?????-(.+?)>", s))
            modality_counter += Counter(ms)

            n = len(self.sentences)

        return dict([(k, float(c) / n)
                     for k, c in modality_counter.items()])
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_transcript_gc_content(self, transcript_obj):
        pattern = re.compile('[cCgG]')

        gc, length = 0, 0
        for interval in transcript_obj.intervals:
            if interval.chrom not in self.chroms:
                continue

            seq = self.chroms[interval.chrom][interval.start:interval.end]
            gc += len(re.findall(pattern, seq))
            length += interval.length

        if length > 0:
            return float(gc) / float(length)
        else:
            return 0

# NOTE: these stub classes are necessary to maintain backwards compatibility with old refdata (1.2 or older)
项目:ComicSpider    作者:QuantumLiu    | 项目源码 | 文件源码
def get_info(self):
        '''
        ????????????url???url
        Get informations of the comic
        return:
            comic title,description,cover url,chapters' urls
        '''
        headers={'use-agent':"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",'Referer':'http://manhua.dmzj.com/tags/s.shtml'}
        root='http://manhua.dmzj.com'
        r_title=r'<span class="anim_title_text"><a href=".*?"><h1>(.*?)</h1></a></span>'
        r_des=r'<meta name=\'description\' content=".*?(??.*?)"/>'#????
        r_cover=r'src="(.*?)" id="cover_pic"/></a>'#??url??
        r_cb=r'<div class="cartoon_online_border" >([\s\S]*?)<div class="clearfix"></div>'#??border
        r_cs=r'<li><a title="(.*?)" href="(.*?)" .*?>.*?</a>'#??????
        try:
            text=requests.get(self.comic_url,headers=headers).text
        except ConnectionError:
            traceback.print_exc()
            raise ConnectionError
        title=re.findall(r_title,text)[0]
        cb=re.findall(r_cb,text)[0]
        chapter_urls=[(c[0],root+c[1]+'#@page=1') for c in re.findall(r_cs,cb)]
        cover_url=re.findall(r_cover,text)[0]
        des=re.findall(r_des,text)
        return title,des,cover_url,chapter_urls
项目:Instagram    作者:Fastcampus-WPS-5th    | 项目源码 | 文件源码
def make_html_content_and_add_tags(self):
        # ????? ???? ?????
        p = re.compile(r'(#\w+)')
        # findall???? ???? ????? ???
        tag_name_list = re.findall(p, self.content)
        # ?? content(Comment??)? ??? ??
        ori_content = self.content
        # ????? ????
        for tag_name in tag_name_list:
            # Tag??? ????? ??, ????? ???? ????? _??
            tag, _ = Tag.objects.get_or_create(name=tag_name.replace('#', ''))
            # ?? content? ??? ??
            change_tag = '<a href="{url}" class="hash-tag">{tag_name}</a>'.format(
                # url=reverse('post:hashtag_post_list', args=[tag_name.replace('#', '')]),
                url=reverse('post:hashtag_post_list',
                            kwargs={'tag_name': tag_name.replace('#', '')}),
                tag_name=tag_name
            )
            ori_content = re.sub(r'{}(?![<\w])'.format(tag_name), change_tag, ori_content, count=1)
            # content? ??? Tag??? ??? tags??? ??
            if not self.tags.filter(pk=tag.pk).exists():
                self.tags.add(tag)
        # ??? ??? ???? html_content? ??
        self.html_content = ori_content
        super().save(update_fields=['html_content'])
项目:django-powerpages    作者:Open-E-WEB    | 项目源码 | 文件源码
def parse_sitemap(content):
    if not isinstance(content, six.text_type):
        content = content.decode('utf-8')
    urlset_match = re.search(
        r'<urlset[^>]*>(?P<urls>[\s\S]*)</urlset>', content
    )
    if urlset_match:
        results = []
        urlset_content = urlset_match.groupdict()['urls']
        for url_content in re.findall(r'<url>([\s\S]+)</url>', urlset_content):
            results.append(
                dict(
                    re.findall(r'<([^>]+)>([^<]*)</[^>]+>', url_content)
                )
            )
    else:
        results = None
    return results
项目:fantasy-dota-heroes    作者:ThePianoDentist    | 项目源码 | 文件源码
def get_players():
    with open("../../junk/iemoakland.html") as f:
        html = f.read()
    output = []
    teams = re.findall('(?s)<div class="influencer-card">(.*?)<!-- Card End  -->', html)
    counter = 1
    for t in teams:
        team_name = re.search('<h1 class="influencer-name">([^<]+)</h1>', t).group(1)
        player_section = re.search('(?s)<p class="influencer-description">(.*?)</p>', t).group(1)
        players = re.findall('(?:<a[^>]+>)?\s*(.*?)(?:</a>)?<br />', player_section)
        if len(players) < 4:
            print(team_name)
            print(players)
        for player in players:
            if '<a hre' in player:
                player = re.search('<a[^>]+>([^<]+)', player).group(1)
            output.append({"id": counter, "name": player, "team": team_name, "value": 10.0})
            counter += 1

    with open("../../lib/pubg_players.py", "w+") as f:
        f.write("pubg_init = " + repr(output))
    return
项目:course-crawler    作者:Foair    | 项目源码 | 文件源码
def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name
项目:course-crawler    作者:Foair    | 项目源码 | 文件源码
def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name
项目:course-crawler    作者:Foair    | 项目源码 | 文件源码
def get_announce(term_id):
    """ ??????? """

    # batchId ?????? str(int(time.time() * 1000))
    post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'httpSessionId': 'dba4977be78d42a78a6e2c2dd2b9bb42', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getAllAnnouncementByTerm', 'c0-id': '0', 'c0-param0': 'number:' + term_id, 'c0-param1': 'number:1', 'batchId': str(int(time.time() * 1000))}
    res = CONNECTION.post('http://mooc.study.163.com/dwr/call/plaincall/CourseBean.getAllAnnouncementByTerm.dwr', data=post_data).text

    announcements = re.findall(r'content="(.*?[^\\])".*title="(.*?[^\\])"', res)

    with open(os.path.join(BASE_DIR, 'Announcements.html'), 'w', encoding='utf-8') as announce_file:
        for announcement in announcements:
            # ????
            announce_content = announcement[0].encode('utf-8').decode('unicode_escape')
            # ????
            announce_title = announcement[1].encode('utf-8').decode('unicode_escape')
            announce_file.write('<h1>' + announce_title + '</h1>\n' + announce_content + '\n')
项目:segno    作者:heuer    | 项目源码 | 文件源码
def pdf_as_matrix(buff, border):
    """\
    Reads the path in the PDF and returns it as list of 0, 1 lists.

    :param io.BytesIO buff: Buffer to read the matrix from.
    """
    pdf = buff.getvalue()
    h, w = re.search(br'/MediaBox \[0 0 ([0-9]+) ([0-9]+)\]', pdf,
                     flags=re.MULTILINE).groups()
    if h != w:
        raise ValueError('Expected equal height/width, got height="{}" width="{}"'.format(h, w))
    size = int(w) - 2 * border

    graphic = _find_graphic(buff)
    res = [[0] * size for i in range(size)]
    for x1, y1, x2, y2 in re.findall(r'\s*(\-?\d+)\s+(\-?\d+)\s+m\s+'
                                        r'(\-?\d+)\s+(\-?\d+)\s+l', graphic):
        x1, y1, x2, y2 = [int(i) for i in (x1, y1, x2, y2)]
        y = abs(y1)
        res[y][x1:x2] = [1] * (x2 - x1)
    return res
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def get(self, netloc, ua, timeout):
        try:
            headers = {'User-Agent': ua, 'Referer': netloc}
            result = _basic_request(netloc, headers=headers, timeout=timeout)

            match = re.findall('xhr\.open\("GET","([^,]+),', result)
            if not match:
                return False

            url_Parts = match[0].split('"')
            url_Parts[1] = '1680'
            url = urlparse.urljoin(netloc, ''.join(url_Parts))

            match = re.findall('rid=([0-9a-zA-Z]+)', url_Parts[0])
            if not match:
                return False

            headers['Cookie'] = 'rcksid=%s' % match[0]
            result = _basic_request(url, headers=headers, timeout=timeout)
            return self.getCookieString(result, headers['Cookie'])
        except:
            return

    # not very robust but lazieness...
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def get(self, result):
        try:
            s = re.compile("S\s*=\s*'([^']+)").findall(result)[0]
            s = base64.b64decode(s)
            s = s.replace(' ', '')
            s = re.sub('String\.fromCharCode\(([^)]+)\)', r'chr(\1)', s)
            s = re.sub('\.slice\((\d+),(\d+)\)', r'[\1:\2]', s)
            s = re.sub('\.charAt\(([^)]+)\)', r'[\1]', s)
            s = re.sub('\.substr\((\d+),(\d+)\)', r'[\1:\1+\2]', s)
            s = re.sub(';location.reload\(\);', '', s)
            s = re.sub(r'\n', '', s)
            s = re.sub(r'document\.cookie', 'cookie', s)

            cookie = '' ; exec(s)
            self.cookie = re.compile('([^=]+)=(.*)').findall(cookie)[0]
            self.cookie = '%s=%s' % (self.cookie[0], self.cookie[1])

            return self.cookie
        except:
            pass
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def odnoklassniki(url):
    try:
        media_id = re.compile('//.+?/.+?/([\w]+)').findall(url)[0]

        result = client.request('http://ok.ru/dk', post={'cmd': 'videoPlayerMetadata', 'mid': media_id})
        result = re.sub(r'[^\x00-\x7F]+', ' ', result)
        result = json.loads(result).get('videos', [])

        hd = []
        for name, quali in {'ultra': '4K', 'quad': '1440p', 'full': '1080p', 'hd': 'HD'}.items():
            hd += [{'quality': quali, 'url': i.get('url')} for i in result if i.get('name').lower() == name]

        sd = []
        for name, quali in {'sd': 'SD', 'low': 'SD', 'lowest': 'SD', 'mobile': 'SD'}.items():
            sd += [{'quality': quali, 'url': i.get('url')} for i in result if i.get('name').lower() == name]

        url = hd + sd[:1]
        if not url == []: return url
    except:
        return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def cldmailru(url):
    try:
        v = url.split('public')[-1]

        r = client.request(url)
        r = re.sub(r'[^\x00-\x7F]+', ' ', r)

        tok = re.findall('"tokens"\s*:\s*{\s*"download"\s*:\s*"([^"]+)', r)[0]

        url = re.findall('"weblink_get"\s*:\s*\[.+?"url"\s*:\s*"([^"]+)', r)[0]

        url = '%s%s?key=%s' % (url, v, tok)

        return url
    except:
        return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def yandex(url):
    try:
        cookie = client.request(url, output='cookie')

        r = client.request(url, cookie=cookie)
        r = re.sub(r'[^\x00-\x7F]+', ' ', r)

        sk = re.findall('"sk"\s*:\s*"([^"]+)', r)[0]

        idstring = re.findall('"id"\s*:\s*"([^"]+)', r)[0]

        idclient = binascii.b2a_hex(os.urandom(16))

        post = {'idClient': idclient, 'version': '3.9.2', 'sk': sk, '_model.0': 'do-get-resource-url', 'id.0': idstring}
        post = urllib.urlencode(post)

        r = client.request('https://yadi.sk/models/?_m=do-get-resource-url', post=post, cookie=cookie)
        r = json.loads(r)

        url = r['models'][0]['data']['file']

        return url
    except:
        return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def geturl(url):
    try:
        r = client.request(url, output='geturl')
        if r == None: return r

        host1 = re.findall('([\w]+)[.][\w]+$', urlparse.urlparse(url.strip().lower()).netloc)[0]
        host2 = re.findall('([\w]+)[.][\w]+$', urlparse.urlparse(r.strip().lower()).netloc)[0]
        if host1 == host2: return r

        proxies = sorted(get(), key=lambda x: random.random())
        proxies = sorted(proxies, key=lambda x: random.random())
        proxies = proxies[:3]

        for p in proxies:
            p += urllib.quote_plus(url)
            r = client.request(p, output='geturl')
            if not r == None: return parse(r)

    except:
        pass
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            t = 'http://www.imdb.com/title/%s' % imdb
            t = client.request(t, headers={'Accept-Language': 'es-AR'})
            t = client.parseDOM(t, 'title')[0]
            t = re.sub('(?:\(|\s)\d{4}.+', '', t).strip().encode('utf-8')

            q = self.search_link % urllib.quote_plus(t)
            q = urlparse.urljoin(self.base_link, q)

            r = client.request(q)

            r = client.parseDOM(r, 'div', attrs = {'class': 'item'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'span', attrs = {'class': 'tt'}), client.parseDOM(i, 'span', attrs = {'class': 'year'})) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]
            r = [i[0] for i in r if cleantitle.get(t) == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            pass
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'item-detail'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch' % url
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            t = cleantitle.get(title)

            p = self.post_link % urllib.quote_plus(cleantitle.query(title))
            q = urlparse.urljoin(self.base_link, self.search_link)

            r = proxy.request(q, 'playing top', post=p, XHR=True)

            r = client.parseDOM(r, 'li')
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if i[0] and i[1]]
            r = [(i[0], re.findall('(.+?)\((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if i[1]]
            r = [i for i in r if t == cleantitle.get(i[1]) and str(year) == i[2]]

            url = proxy.parse(r[0][0])

            url = re.findall('(?://.+?|)(/.+)', url)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            pass
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(title))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch' % url
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases):
        try:
            url = '%s/%s-%s/' % (self.base_link, cleantitle.geturl(title), year)
            url = client.request(url, output='geturl')

            if url == None:
                t = cleantitle.get(title)
                q = '%s %s' % (title, year)
                q = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(q))
                r = client.request(q)
                r = client.parseDOM(r, 'div', attrs={'class': 'inner'})
                r = client.parseDOM(r, 'div', attrs={'class': 'info'})
                r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
                r = [(i[0], re.findall('(?:^Watch Movie |^Watch movies |^Watch |)(.+?)\((\d{4})', i[1])) for i in r]
                r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if i[1]]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and year == i[2]][0]

            if url == None: raise Exception()
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(title))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]

            url = urlparse.urljoin(self.base_link, '%s/watching.html' % url)
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            query = urlparse.urljoin(self.base_link, self.search_link)
            query = query % urllib.quote_plus(title)

            t = cleantitle.get(title)

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs = {'class': 'thumb'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), re.findall('(\d{4})', i)) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]

            url = [i[0] for i in r if t in cleantitle.get(i[1]) and year == i[2]][0]
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            query = urlparse.urljoin(self.base_link, self.search_link)
            query = query % urllib.quote_plus(data['tvshowtitle'])

            t = cleantitle.get(data['tvshowtitle'])

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs = {'class': 'thumb'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), re.findall('(\d{4})', i)) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]

            url = [i[0] for i in r if t in cleantitle.get(i[1]) and ('Season %s' % season) in i[1]][0]
            url += '?episode=%01d' % int(episode)

            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            year = re.findall('(\d{4})', premiered)[0]
            season = '%01d' % int(season) ; episode = '%01d' % int(episode)
            tvshowtitle = '%s %s: Season %s' % (data['tvshowtitle'], year, season)

            url = cache.get(self.pidtv_tvcache, 120, tvshowtitle)

            if url == None: raise Exception()

            url += '?episode=%01d' % int(episode)
            url = url.encode('utf-8')
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            headers = eval(data['headers'])
            aliases = eval(data['aliases'])
            title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
            title = cleantitle.getsearch(title)
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)
            r = client.request(query, headers=headers, timeout='30', mobile=True)
            match = re.compile('alias=(.+?)\'">(.+?)</a>').findall(r)
            r = [(i[0], re.findall('(.+?)\s+-\s+Season\s+(\d+)', i[1])) for i in match]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            r = [i[0] for i in r if self.matchAlias(i[1], aliases) and int(season) == int(i[2])][0]
            url = {'type': 'tvshow', 'id': r, 'episode': episode, 'season': season, 'headers': headers}
            url = urllib.urlencode(url)
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            if debrid.status() == False: raise Exception()

            t = cleantitle.get(title)

            query = self.search_link + urllib.quote_plus(title)
            query = urlparse.urljoin(self.base_link, query)

            r = client.request(query, XHR=True)
            r = json.loads(r)

            r = [i for i in r if 'category' in i and 'movie' in i['category'].lower()]
            r = [(i['url'], i['label']) for i in r if 'label' in i and 'url' in i]
            r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            year = re.findall('(\d{4})', premiered)[0]
            if int(year) >= 2016: raise Exception()

            url = re.sub('[^A-Za-z0-9]', '-', data['tvshowtitle']).lower()
            url = self.tvsearch_link % (url, data['year'], '%01d' % int(season), '%01d' % int(episode))

            r = urlparse.urljoin(self.base_link, url)
            r = client.request(r, output='geturl')
            if not data['year'] in r: raise Exception()

            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if url == None: return

            tv_maze = tvmaze.tvMaze()
            num = tv_maze.episodeAbsoluteNumber(tvdb, int(season), int(episode))
            num = str(num)

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            r = client.parseDOM(r, 'tr', attrs = {'class': ''})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'td', attrs = {'class': 'epnum'})) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [i[0] for i in r if num == i[1]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)

            c, h = self.__get_cookies(query)

            t = cleantitle.get(title)

            r = client.request(query, headers=h, cookie=c)

            r = client.parseDOM(r, 'div', attrs={'class': 'cell_container'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            url = self.__search([data['tvshowtitle']] + source_utils.aliases_to_array(eval(data['aliases'])), data['year'], season)
            if not url: return

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ep_link'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content) for i in r if i]
            r = [(i[0], re.findall("^(?:episode)\s*(\d+)$", i[1], re.I)) for i in r]
            r = [(i[0], i[1][0] if i[1] else '0') for i in r]
            r = [i[0] for i in r if int(i[1]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch/' % url
            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def resolve(self, url):
        try:
            b = urlparse.urlparse(url).netloc
            b = re.compile('([\w]+[.][\w]+)$').findall(b)[0]

            if not b in base64.b64decode(self.b_link): return url

            u, p, h = url.split('|')
            r = urlparse.parse_qs(h)['Referer'][0]
            #u += '&app_id=Exodus'

            c = self.request(r, output='cookie', close=False)
            result = self.request(u, post=p, referer=r, cookie=c)

            url = result.split('url=')
            url = [urllib.unquote_plus(i.strip()) for i in url]
            url = [i for i in url if i.startswith('http')]
            url = url[-1]

            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % (cleantitle.geturl(title.replace('\'', '-'))))
            r = client.request(url, timeout='10', headers=headers)
            r = client.parseDOM(r, 'h2', attrs={'class': 'tit'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            try:
                match = [i[0] for i in r if self.matchAlias(i[1], aliases) and year == i[2]][0]
            except:
                match = [i[0] for i in r if self.matchAlias(i[1], aliases)][0]

            url = re.findall('(?://.+?|)(/.+)', match)[0]
            url = client.replaceHTMLCodes(url)
            return url.encode('utf-8')
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def __search(self, title, localtitle, year, content_type):
        try:
            t = cleantitle.get(title)
            tq = cleantitle.get(localtitle)
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            query = urlparse.urljoin(self.base_link, self.search_link)

            post = urllib.urlencode({'k': "%s"}) % tq
            r = client.request(query, post=post)
            r = json.loads(r)

            r = [i.get('result') for i in r if i.get('type', '').encode('utf-8') == content_type]
            r = [(i.get('url'), i.get('originalTitle'), i.get('title'), i.get('anneeProduction', 0), i.get('dateStart', 0)) for i in r]
            r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1] if i[1] else ''), re.sub('<.+?>|</.+?>', '', i[2] if i[2] else ''), i[3] if i[3] else re.findall('(\d{4})', i[4])[0]) for i in r if i[3] or i[4]]
            r = sorted(r, key=lambda i: int(i[3]), reverse=True)  # with year > no year
            r = [i[0] for i in r if i[3] in y and (t.lower() == cleantitle.get(i[1].lower()) or tq.lower() == cleantitle.query(i[2].lower()))][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')

            return url
        except:
            return
项目:plugin.video.exodus    作者:lastship    | 项目源码 | 文件源码
def __search(self, titles, year, season='0'):
        try:
            query = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query, post={'do': 'search', 'subaction': 'search', 'search_start': 0, 'full_search': 0, 'result_from': 1, 'story': cleantitle.query(titles[0])})

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'fullstream'})
            r = [(dom_parser.parse_dom(i, 'h3', attrs={'class': 'mov-title'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'fullmask'})) for i in r]
            r = [(dom_parser.parse_dom(i[0], 'a', req='href'), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*/year/\d+')})) for i in r]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0].content if i[1] else '0') for i in r if i[0]]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:\s*-\s*saison)\s+(\d+)', i[1], re.I)) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return