我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scrapy.signals.spider_opened()。
def __init__(self,rule): dispatcher.connect(self.spider_opened, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) self.rule = rule self.name = rule.name self.allowed_domains = rule.allowed_domains.split(',') self.start_urls = rule.start_urls.split(',') rule_list = [] # ??`???`??? if len(rule.next_page): rule_list.append(Rule(LinkExtractor(restrict_xpaths=rule.next_page), follow=True)) rule_list.append(Rule(LinkExtractor( allow=rule.allow_url.split(','), unique=True), follow=True, callback='parse_item')) self.rules = tuple(rule_list) super(ProxySpiderSpider, self).__init__()
def spider_opened(self, spider): print "spider is running!" item = SpiderCrawlLog( spiderID=self.rule.id, spiderName=self.rule.name, status="Running...", startTime=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), endTime=None, pages=0, items=0 ) session = loadSession() log = session.query(SpiderCrawlLog).filter( SpiderCrawlLog.spiderID == self.rule.id and SpiderCrawlLog.endTime is None) # ????spider????????? if len(log) == 0: session.add(item) session.commit() else: pass
def from_crawler(cls, crawler): # This method is used by Scrapy to create your spiders. s = cls() crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) return s
def spider_opened(self, spider): spider.logger.info('Spider opened: %s' % spider.name)
def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
def spider_opened(self, spider): file = open('/tmp/jobs.json', 'a+') self.files[spider] = file self.exporter = JsonItemExporter(file , encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def from_crawler(cls, crawler): s = cls() crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) return s
def spider_opened(self, spider): spider.logger.info('Spider opened: %s' % spider.name) # ?? User-Agent ???