我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用scrapy.signals.item_scraped()。
def from_crawler(cls, crawler): instance = cls(crawler.stats) crawler.signals.connect(instance.item_dropped, signal=signals.item_dropped) crawler.signals.connect(instance.item_scraped, signal=signals.item_scraped) crawler.signals.connect(instance.response_received, signal=signals.response_received) crawler.signals.connect(instance.response_downloaded, signal=signals.response_downloaded) crawler.signals.connect(instance.item_saved, signal=mysignals.item_saved) crawler.signals.connect(instance.item_saved_failed, signal=mysignals.item_saved_failed) crawler.signals.connect(instance.html_saved, signal=mysignals.html_saved) crawler.signals.connect(instance.html_saved_failed, signal=mysignals.html_saved_failed) crawler.signals.connect(instance.timeouterror, signal=mysignals.timeouterror) crawler.signals.connect(instance.dnslookuperror, signal=mysignals.dnslookuperror) return instance
def __init__(self, crawler): self.crawler = crawler self.initiatives = 0 self.amendments = 0 self.finishtext = 0 self.responses = 0 self.members = 0 # connect the extension object to signals crawler.signals.connect(self.spider_closed, signal=signals.spider_closed) crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
def item_scraped(self, item, spider): if isinstance(item, InitiativeItem): self.initiatives += 1 elif isinstance(item, AmendmentItem): self.amendments += 1 elif isinstance(item, FinishTextItem): self.finishtext += 1 elif isinstance(item, ResponseItem): self.responses += 1 elif isinstance(item, MemberItem): self.responses += 1
def setup_redis(self): """Setup redis connection and idle signal. This should be called after the spider has set its crawler object. """ if not self.redis_key: self.redis_key = '%s:start_urls' % self.name self.server = connection.from_settings(self.crawler.settings) # idle signal is called when the spider has no requests left, # that's when we will schedule new requests from redis queue self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped) self.log("Reading URLs from redis list '%s'" % self.redis_key)
def item_scraped(self, *args, **kwargs): """Avoids waiting for the spider to idle before scheduling the next request""" self.schedule_next_request()
def from_crawler(cls, crawler): ext = cls(crawler.stats) crawler.signals.connect(ext.spider_opened, signal=signals.spider_opened) crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed) crawler.signals.connect(ext.item_scraped, signal=signals.item_scraped) return ext
def item_scraped(self, item, spider): pass
def from_crawler(cls, crawler): instance = cls(crawler.stats) crawler.signals.connect(instance.item_dropped, signal=signals.item_dropped) crawler.signals.connect(instance.item_scraped, signal=signals.item_scraped) crawler.signals.connect(instance.response_received, signal=signals.response_received) crawler.signals.connect(instance.response_downloaded, signal=signals.response_downloaded) return instance
def item_scraped(self, item, spider): #??item??????itempipeline?????? self.stats.inc_value('item/scraped', spider=spider)
def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.engine_started, signals.engine_started) # ???? crawler.signals.connect(pipeline.engine_stopped, signals.engine_stopped) # ???? crawler.signals.connect(pipeline.item_scraped, signals.item_scraped) # ?????????? crawler.signals.connect(pipeline.item_dropped, signals.item_dropped) # ?????????? crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) # ???????????? crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) # ???????????? crawler.signals.connect(pipeline.spider_idle, signals.spider_idle) # ???? crawler.signals.connect(pipeline.spider_error, signals.spider_error) # ???? crawler.signals.connect(pipeline.request_scheduled, signals.request_scheduled) # ?????? crawler.signals.connect(pipeline.request_dropped, signals.request_dropped) # ?????? crawler.signals.connect(pipeline.response_received, signals.response_received) # ???? crawler.signals.connect(pipeline.response_downloaded, signals.response_downloaded) # ???? return pipeline
def item_scraped(self, item, response, spider): """ ?????????? :param item: :param response: :param spider: :return: """ print time.strftime("%Y-%m-%d %H:%M:%S"), 'Pipeline Signals: item_scraped' pass