Python fake_useragent 模块,UserAgent() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用fake_useragent.UserAgent()

项目:landchina-spider    作者:sundiontheway    | 项目源码 | 文件源码
def process_request(self, request, spider):
        if request.meta.has_key('PhantomJS'):
            log.debug('PhantomJS Requesting: %s' % request.url)
            ua = None
            try:
                ua = UserAgent().random
            except:
                ua = 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11'

            webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = ua

            try:
                self.driver.get(request.url)
                content = self.driver.page_source.encode('utf-8')
                url = self.driver.current_url.encode('utf-8')
            except:
                return HtmlResponse(request.url, encoding='utf-8', status=503, body='')

            if content == '<html><head></head><body></body></html>':
                return HtmlResponse(request.url, encoding ='utf-8', status=503, body='')
            else:
                return HtmlResponse(url, encoding='utf-8', status=200, body=content)

        else:
            log.debug('Common Requesting: %s' % request.url)
项目:Fuk    作者:r4gnax    | 项目源码 | 文件源码
def req_handle():
    ua=UserAgent()
    def do_req(u):
        return requests.get(u, headers={'user-agent': ua.random})
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
        loop = asyncio.get_event_loop()
        futures = [
            loop.run_in_executor(executor, do_req, HOST+w) for w in words
        ]
        for response in await asyncio.gather(*futures):
            if response.status_code < 400:
                if response.url[-1] == '/':
                    print("--DIR: %s - %i" % (response.url, response.status_code))
                else:
                    print("%s - %i (%i bytes)" % (response.url, response.status_code, len(response.content)))
            pass
项目:PyCarGr    作者:Florents-Tselai    | 项目源码 | 文件源码
def __init__(self, search_page_url):
        self.search_page_url = search_page_url
        req = Request(
            search_page_url,
            data=None,
            headers={
                'User-Agent': UserAgent().chrome
            }
        )
        self.html = urlopen(req).read().decode('utf-8')
        self.soup = BeautifulSoup(self.html, 'html.parser')
        self.num_results = None
        for f in self.soup.find_all('strong'):
            if '????????' in f.text:
                if f.text.split()[0].isdigit():
                    self.num_results = int(f.text.split()[0])
项目:ProxyPool    作者:Germey    | 项目源码 | 文件源码
def get_page(url, options={}):
    ua = UserAgent()
    base_headers = {
        'User-Agent':  ua.random,
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8'
    }
    headers = dict(base_headers, **options)
    print('Getting', url)
    try:
        r = requests.get(url, headers=headers)
        print('Getting result', url, r.status_code)
        if r.status_code == 200:
            return r.text
    except ConnectionError:
        print('Crawling Failed', url)
        return None
项目:ProxyYourSpider    作者:rafacheng    | 项目源码 | 文件源码
def fillProxyPool(self):
        global offset
        while self.llen < self.size:
            url = self.url + '&offset=' + str(offset)
            offset += 50
            ua = UserAgent()
            headers = {'User-Agent' : ua.random}
            response = requests.get(url, headers=headers)
            soup = BeautifulSoup(response.text, 'lxml')
            lists = soup.find('tbody').find_all('tr')
            for ls in lists:
                tds = ls.find_all('td')
                proxy = ''.join(tds[0].text.split())
                _type = ''.join(tds[1].text.split()).lower()
                validity = self.checkValidity(_type, proxy)
                if validity == True:
                    self.r.lpush(_type, proxy)
                    print '1 proxy added: %s. http: %d; https: %s.' \
                            %(proxy, self.r.llen('http'), self.r.llen('https'))
            self.__class__.llen += self.r.llen('http') + self.r.llen('https')
项目:ProxyYourSpider    作者:rafacheng    | 项目源码 | 文件源码
def checkValidity(self, _type, proxy):
        proxyDict = {_type : _type + '://' + proxy}
        ua = UserAgent()
        headers = {'User-Agent' : ua.random}
        try:
            if _type == 'http':
                r = requests.get(self.http_test_url, proxies=proxyDict,\
                        headers=headers, timeout=2)
            else:
                r = requests.get(self.https_test_url, proxies=proxyDict,\
                        headers=headers, timeout=2)
        except Exception:
            return False
        soup = BeautifulSoup(r.text, 'lxml')
        try:
            retDict = eval(soup.find('body').text)
        except Exception:
            return False
        if proxy.split(':')[0] == retDict['origin']:
            return True
项目:ShiokBot    作者:kianhean    | 项目源码 | 文件源码
def get_news_st():
    """ Get News From ST """
    # Get Text
    headers = {'User-Agent': UserAgent().random}
    website = r.get('http://www.straitstimes.com/container/custom-landing-page/breaking-news',
                    headers=headers)
    website_text = website.text

    # Parse HTML using BS
    soup = BeautifulSoup(website_text, 'html.parser')

    # Find all Headlines
    headlines = soup.findAll('span', {'class' : 'story-headline'})
    time_lines = soup.findAll('div', {'class' : 'node-postdate'})

    count_ = 0
    final_text = "<b>Top Singapore Headlines</b>\n\n"

    # Loop Through Headlines!
    for headline in headlines[:5]:
        final_text += '<a href="' + 'http://www.straitstimes.com' + headline.a['href'] + '">'
        final_text += headline.get_text()[1:] + "</a>"
        final_text += "\n" + time_lines[count_].get_text() + "\n\n"
        count_ += 1
    return final_text
项目:ArticleSpider    作者:mtianyan    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
项目:ArticleSpider    作者:mtianyan    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddlware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
项目:landchina-spider    作者:sundiontheway    | 项目源码 | 文件源码
def __init__(self, user_agent=''):
        self.user_agent = user_agent
        try:
            self.faker = UserAgent()
        except Exception as e:
            log.debug("Fake-useragent error, use default. (%s)" % e.message)
            self.faker = None
项目:landchina-spider    作者:sundiontheway    | 项目源码 | 文件源码
def process_request(self, request, spider):
        ua = None
        if self.faker:
            ua = self.faker.random
        else:
            ua = random.choice(self.user_agent_list)

        log.debug("Current UserAgent: %s" % ua)
        request.headers.setdefault('User-Agent', ua)
项目:amazon-crawler    作者:ahmedezzeldin93    | 项目源码 | 文件源码
def __init__(self, settings, user_agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101"
                                            " Firefox/54.0"):
        super(RandomUserAgentMiddleware, self).__init__()
        self.user_agent = user_agent
        try:
            self.user_agent_engine = UserAgent()
        except Exception, ex:
            logging.error("Failed to create user agent engine object. Reason: %s", ex)
项目:IPProxyPool    作者:jianghaibo12138    | 项目源码 | 文件源码
def getHeader(self, host='', cookie=''):
        # ???useragent
        ua = UserAgent()
        # ???????useagent
        if host:
            return {
                'User-Agent': ua.random,
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
                'Connection': 'keep-alive',
                'Accept-Encoding': 'gzip, deflate, sdch',
                'Upgrade-Insecure-Requests': '1',
                'Host': host,
                'Cookie': cookie
            }
        elif not host and not cookie:
            return {
                'User-Agent': ua.random,
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
                'Connection': 'keep-alive',
                'Accept-Encoding': 'gzip, deflate, sdch',
                'Upgrade-Insecure-Requests': '1',
            }
        else:
            return {
                'User-Agent': ua.random,
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
                'Connection': 'keep-alive',
                'Accept-Encoding': 'gzip, deflate, sdch',
                'Upgrade-Insecure-Requests': '1',
                'Cookie': cookie
            }
项目:PyCarGr    作者:Florents-Tselai    | 项目源码 | 文件源码
def __init__(self, car_id):
        self.car_id = car_id
        self.req = Request(
            'https://www.car.gr/%s' % self.car_id,
            data=None,
            headers={
                'User-Agent': UserAgent().chrome
            }
        )
        self.html = urlopen(self.req).read().decode('utf-8')
        self.soup = BeautifulSoup(self.html, 'html.parser')
项目:scrapy_zhihu    作者:mockingbirds    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
项目:web_develop    作者:dongweiming    | 项目源码 | 文件源码
def get_user_agent():
    ua = UserAgent()
    return ua.random
项目:web_develop    作者:dongweiming    | 项目源码 | 文件源码
def get_user_agent():
    ua = UserAgent()
    return ua.random
项目:web_develop    作者:dongweiming    | 项目源码 | 文件源码
def get_user_agent():
    ua = UserAgent()
    return ua.random
项目:web_develop    作者:dongweiming    | 项目源码 | 文件源码
def get_user_agent():
    ua = UserAgent()
    return ua.random
项目:web_develop    作者:dongweiming    | 项目源码 | 文件源码
def get_user_agent():
    ua = UserAgent()
    return ua.random
项目:crawler    作者:brantou    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
项目:househunt    作者:althor880    | 项目源码 | 文件源码
def retrieve_dls(self):
        ua = UserAgent()
        ua.update
        user_agent = ua.random
        for dl_url in self.dl_urls:
            headers = { 'User-Agent': user_agent }
            req = urllib2.Request(dl_url, headers=headers)
            browse = urllib2.urlopen(req)
            csv_str = browse.read()
            csv_f = StringIO.StringIO(csv_str)
            reader = csv.reader(csv_f, delimiter=',')
            headers = reader.next()
            for row in reader:
                ds = zip(headers, row)
                self.result_sets.append(dict(ds))
项目:TvLive    作者:Rano1    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddlware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
项目:Hands-Chopping    作者:ecmadao    | 项目源码 | 文件源码
def __fetch_goods__(self):
        us = UserAgent()
        self.headers['User-Agent'] = us.random
项目:showroom    作者:wlerin    | 项目源码 | 文件源码
def __init__(self, pool_maxsize=100):
        super().__init__()
        https_adapter = HTTPAdapter(pool_maxsize=pool_maxsize)
        self.mount('https://www.showroom-live.com', https_adapter)
        self.headers = {"UserAgent": ua_str}
项目:Douban_Crawler    作者:rafacheng    | 项目源码 | 文件源码
def process_request(self,request, spider):
        user_agent = UserAgent()
        ua = user_agent.random
        if ua:
            #print ua
            print "********Current UserAgent:%s************" %ua  
            #log.msg('Current UserAgent: '+ua, level='INFO') 
            request.headers.setdefault('User-Agent', ua)
项目:Douban_Crawler    作者:rafacheng    | 项目源码 | 文件源码
def process_request(self,request,spider):
        user_agent = UserAgent()
        ua = user_agent.random
        if ua:
            log.msg('Current UserAgent: '+ua, level=log.INFO) 
            request.headers.setdefault('User-Agent', ua)
项目:Practice_project    作者:Ventotu    | 项目源码 | 文件源码
def getHTMLText(url,code="utf-8"):
    try:
        ua=UserAgent() #????header?????
        headers1={'User-Agent': 'ua.random'}#????header?????
        r = requests.get(url,headers=headers1)
        r.raise_for_status()
        r.encoding = code
        return r.text
    except:
        return "getHTML error"
项目:Practice_project    作者:Ventotu    | 项目源码 | 文件源码
def getHTMLText(url,code="utf-8"):
    try:
        ua=UserAgent() #????header?????
        headers1={'User-Agent': 'ua.random'}#????header?????
        r = requests.get(url,headers=headers1)
        r.raise_for_status()
        r.encoding = code
        return r.text
    except:
        return "getHTML error"
项目:Practice_project    作者:Ventotu    | 项目源码 | 文件源码
def getHTMLText(url,code="ascii"):
    try:
        ua=UserAgent()
        headers1={'User-Agent': 'ua.random'} # Use random header to imitate human behaviour
        r = requests.get(url,headers=headers1)
        r.raise_for_status()
        r.encoding = code
        return r.text
    except:
        return "getHTML error"
项目:PacktpubDownloaderAndGetter    作者:AkdM    | 项目源码 | 文件源码
def get_user_agent():
  return UserAgent().random
项目:django-scrapy-lcv_search    作者:Albino1995    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
项目:SlowLoris    作者:maxkrivich    | 项目源码 | 文件源码
def __init__(self, target, socket_count=300, headers={
        'User-Agent': None,  # UserAgent()
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'ru,en-us;q=0.7,en;q=0.3',
        'Accept-Charset': 'windows-1251,utf-8;q=0.7,*;q=0.7',
        'Connection': 'keep-alive'
    }):
        """

        :param target: link to web server [TargetInfo]
        :param socket_count: maximum count of created socket default value 300
        :param headers: HTTP headers what puts in request
        """
        super(Connection, self).__init__()
        # self.lock = lock
        self.target = target
        self.headers = headers

        try:
            self.fake_ua = UserAgent()
        except FakeUserAgentError as fe:
            logger.error(fe)
        # Counters
        self.socket_count = socket_count
        self.__cnt_sent_requests = 0
        self.__cnt_died_sockets = 0
        self.__cnt_alive_socket = 0
        self.__sockets = []
        self.is_stop = False
项目:fintech_spider    作者:hee0624    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
项目:fintech_spider    作者:hee0624    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
项目:google-news-scraper    作者:philipperemy    | 项目源码 | 文件源码
def google_news_run(keyword, limit=10, year_start=2010, year_end=2011, debug=True, sleep_time_every_ten_articles=0):
    num_articles_index = 0
    ua = UserAgent()
    result = []
    while num_articles_index < limit:
        url = forge_url(keyword, num_articles_index, year_start, year_end)
        if debug:
            logging.debug('For Google -> {}'.format(url))
            logging.debug('Total number of calls to Google = {}'.format(NUMBER_OF_CALLS_TO_GOOGLE_NEWS_ENDPOINT))
        headers = {'User-Agent': ua.chrome}
        try:
            response = requests.get(url, headers=headers, timeout=20)
            links = extract_links(response.content)

            nb_links = len(links)
            if nb_links == 0 and num_articles_index == 0:
                raise Exception(
                    'No results fetched. Either the keyword is wrong '
                    'or you have been banned from Google. Retry tomorrow '
                    'or change of IP Address.')

            if nb_links == 0:
                print('No more news to read for keyword {}.'.format(keyword))
                break

            for i in range(nb_links):
                cur_link = links[i]
                logging.debug('TITLE = {}, URL = {}, DATE = {}'.format(cur_link[1], cur_link[0], cur_link[2]))
            result.extend(links)
        except requests.exceptions.Timeout:
            logging.debug('Google news TimeOut. Maybe the connection is too slow. Skipping.')
            pass
        num_articles_index += 10
        if debug and sleep_time_every_ten_articles != 0:
            logging.debug('Program is going to sleep for {} seconds.'.format(sleep_time_every_ten_articles))
        time.sleep(sleep_time_every_ten_articles)
    return result
项目:jobbole_spider    作者:pujinxiao    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
项目:FirstSpider    作者:yipwinghong    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_USERAGENT_TYPE", "random")
项目:gradcrawler    作者:cullengao    | 项目源码 | 文件源码
def __init__(self, settings):
        super(self.__class__, self).__init__()
        self.ua = UserAgent()
        self.per_proxy = settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
        self.logger = getLoggerFromSettings(__name__, settings)
项目:python    作者:panxus    | 项目源码 | 文件源码
def __init__(self,crawler):
        super(DownloaderMiddlewareUA,self).__init__()
        self.ua_type = crawler.settings.get('USER_AGENT_DEFAULT','random')
        self.ua = UserAgent()
项目:youku    作者:malone6    | 项目源码 | 文件源码
def __init__(self):
        # self.url_input = input(
        #     "????????????????????:http://v.youku.com/v_show/id_XMTU3NTkxNDIwMA==.html,?????????" + '\n' + '>>>')
        self.headers = {"accept-encoding": "gzip, deflate, sdch",
                        "accept-language": "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4",
                        "user-agent": UserAgent().random,
                        }
        # cookies??cna?????????cookies???????????????cookies?????????????url??
        self.utid = urllib.parse.quote('onBdERfZriwCAW+uM3cVByOa')
        # self.utid = 'onBdERfZriwCAW+uM3cVByOa'
项目:uname_ctf-tools    作者:unamecorporation    | 项目源码 | 文件源码
def geraUserAgente():
    ua=UserAgent()
    ua.update
    user=""
    return str(user)
项目:uname_ctf-tools    作者:unamecorporation    | 项目源码 | 文件源码
def Help():
    os.system("setterm -foreground white")
    print '''
Uso: python fakeRequestes.py
Endereço da WEB é a pagina que deseja acessar 
com um UserAgent falso.
    '''
项目:Charlotte    作者:LiZoRN    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddlware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
项目:Charlotte    作者:LiZoRN    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddlware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
项目:Charlotte    作者:LiZoRN    | 项目源码 | 文件源码
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
项目:AmazonScraping    作者:santoshghimire    | 项目源码 | 文件源码
def __init__(self):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
项目:brush    作者:chenshiyang2015    | 项目源码 | 文件源码
def getUA():
    ua = UserAgent()
    return ua.random
项目:brush    作者:chenshiyang2015    | 项目源码 | 文件源码
def get_user_agent():
    if platform.uname()[0] =='Windows':
        ua = UserAgent()
        return ua.random
    else:
        with codecs.open('/home/rd/fake_useragent.json', encoding='utf-8', mode='rb',) as fp:
            s = json.load(fp)

        attr = s['randomize'][str(random.randint(0, len(s['randomize']) - 1))]
        return s['browsers'][attr][random.randint(0, len(s['browsers'][attr]) - 1)]
项目:brush    作者:chenshiyang2015    | 项目源码 | 文件源码
def getUA():
    ua = UserAgent()
    return ua.random