Python scrapy.signals 模块,spider_opened() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scrapy.signals.spider_opened()

项目:ip_proxy_pool    作者:leeyis    | 项目源码 | 文件源码
def __init__(self,rule):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.rule = rule
        self.name = rule.name
        self.allowed_domains = rule.allowed_domains.split(',')
        self.start_urls = rule.start_urls.split(',')
        rule_list = []

        # ??`???`???
        if len(rule.next_page):
            rule_list.append(Rule(LinkExtractor(restrict_xpaths=rule.next_page), follow=True))

        rule_list.append(Rule(LinkExtractor(
            allow=rule.allow_url.split(','),
            unique=True),
            follow=True,
            callback='parse_item'))

        self.rules = tuple(rule_list)
        super(ProxySpiderSpider, self).__init__()
项目:ip_proxy_pool    作者:leeyis    | 项目源码 | 文件源码
def spider_opened(self, spider):
        print "spider is running!"
        item = SpiderCrawlLog(
                              spiderID=self.rule.id,
                              spiderName=self.rule.name,
                              status="Running...",
                              startTime=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                              endTime=None,
                              pages=0,
                              items=0
                              )
        session = loadSession()
        log = session.query(SpiderCrawlLog).filter(
            SpiderCrawlLog.spiderID == self.rule.id
            and SpiderCrawlLog.endTime is None)

        # ????spider?????????
        if len(log) == 0:
            session.add(item)
            session.commit()
        else:
            pass
项目:ArticleSpider    作者:mtianyan    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:ArticleSpider    作者:mtianyan    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:myplanB    作者:JainulV    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:myplanB    作者:JainulV    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:lichking    作者:melonrun    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:lichking    作者:melonrun    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:ScrapyTutorial    作者:boybeak    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:ScrapyTutorial    作者:boybeak    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:sbdspider    作者:onecer    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:sbdspider    作者:onecer    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:CustomsSpider    作者:orangZC    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:CustomsSpider    作者:orangZC    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:byrbbs-py3    作者:ryderchan    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:byrbbs-py3    作者:ryderchan    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:python-spider    作者:naginoasukara    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:python-spider    作者:naginoasukara    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:job_scraper    作者:wlabatey    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:job_scraper    作者:wlabatey    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:job_scraper    作者:wlabatey    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline
项目:job_scraper    作者:wlabatey    | 项目源码 | 文件源码
def spider_opened(self, spider):
        file = open('/tmp/jobs.json', 'a+')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file , encoding='utf-8', ensure_ascii=False)
        self.exporter.start_exporting()
项目:Acfun_article_spider    作者:bbbbx    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:Acfun_article_spider    作者:bbbbx    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:Tumblr_Feed_Video_Crawler    作者:VisitBoy    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:Tumblr_Feed_Video_Crawler    作者:VisitBoy    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:Python_Stock_Github    作者:DavidFnck    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:Python_Stock_Github    作者:DavidFnck    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:Spider    作者:Ctrlsman    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:Spider    作者:Ctrlsman    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:github-trending    作者:csbok    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:github-trending    作者:csbok    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:JobSpiderScrapy    作者:SethWen    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:JobSpiderScrapy    作者:SethWen    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)


# ?? User-Agent ???
项目:AJKscrapy    作者:iiiuuu    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:AJKscrapy    作者:iiiuuu    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:scrapy-azuresearch-crawler-samples    作者:yokawasa    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:scrapy-azuresearch-crawler-samples    作者:yokawasa    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:scrapy-azuresearch-crawler-samples    作者:yokawasa    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:scrapy-azuresearch-crawler-samples    作者:yokawasa    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:scrapy-azuresearch-crawler-samples    作者:yokawasa    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:python    作者:goodstuden    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:python    作者:goodstuden    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:crepriceSpider    作者:zhousenbiao    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:crepriceSpider    作者:zhousenbiao    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:caoliuscrapy    作者:leyle    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:caoliuscrapy    作者:leyle    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:AutoHome_WOM_Spider    作者:dtc-auto    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
项目:AutoHome_WOM_Spider    作者:dtc-auto    | 项目源码 | 文件源码
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
项目:spider    作者:pythonsite    | 项目源码 | 文件源码
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s