我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用scrapy.signals.engine_stopped()。
def from_crawler(cls, crawler): s = crawler.settings proxy_path = s.get('ROTATING_PROXY_LIST_PATH', None) if proxy_path is not None: with codecs.open(proxy_path, 'r', encoding='utf8') as f: proxy_list = [line.strip() for line in f if line.strip()] else: proxy_list = s.getlist('ROTATING_PROXY_LIST') if not proxy_list: raise NotConfigured() mw = cls( proxy_list=proxy_list, logstats_interval=s.getfloat('ROTATING_PROXY_LOGSTATS_INTERVAL', 30), stop_if_no_proxies=s.getbool('ROTATING_PROXY_CLOSE_SPIDER', False), max_proxies_to_try=s.getint('ROTATING_PROXY_PAGE_RETRY_TIMES', 5), backoff_base=s.getfloat('ROTATING_PROXY_BACKOFF_BASE', 300), backoff_cap=s.getfloat('ROTATING_PROXY_BACKOFF_CAP', 3600) ) crawler.signals.connect(mw.engine_started, signal=signals.engine_started) crawler.signals.connect(mw.engine_stopped, signal=signals.engine_stopped) return mw
def __init__(self): dispatcher.connect(self.spider_opended, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) dispatcher.connect(self.engine_stopped, signals.engine_stopped) dispatcher.connect(self.engine_started, signals.engine_started) # ????????????scrapy_site?????? self.curpath = os.getcwd() #????????????? self.spidername_filepath = self.curpath + "/scrapy_site/msg/" # ?????keyword.conf???????? self.keywordsDict = dict() self.getKeywords() #???????????? self.webnamesDict = dict() self.getWebnames() # ???? self.msgDict = dict() SavePipeline.initCount = SavePipeline.initCount + 1
def __init__(self): self.conn = None dispatcher.connect(self.initialize, signals.engine_started) dispatcher.connect(self.finalize, signals.engine_stopped)
def __init__(self, crawler): if not crawler.settings.getbool('JSONRPC_ENABLED'): raise NotConfigured self.crawler = crawler logfile = crawler.settings['JSONRPC_LOGFILE'] self.portrange = [int(x) for x in crawler.settings.getlist('JSONRPC_PORT', [6023, 6073])] self.host = crawler.settings.get('JSONRPC_HOST', '127.0.0.1') root = RootResource(crawler) root.putChild('crawler', CrawlerResource(self.crawler)) # root.putChild('spidercls', CrawlerResource(self.crawler.__dict__['spidercls'])) server.Site.__init__(self, root, logPath=logfile) self.noisy = False crawler.signals.connect(self.start_listening, signals.engine_started) crawler.signals.connect(self.stop_listening, signals.engine_stopped)
def __init__(self): self.filename += settings.MARKET_NAME self.filename += ".db" self.filename = path.join(settings.DATABASE_DIR, self.filename) print self.filename self.conn = None dispatcher.connect(self.initialize, signals.engine_started) dispatcher.connect(self.finalize, signals.engine_stopped)
def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.engine_started, signals.engine_started) # ???? crawler.signals.connect(pipeline.engine_stopped, signals.engine_stopped) # ???? crawler.signals.connect(pipeline.item_scraped, signals.item_scraped) # ?????????? crawler.signals.connect(pipeline.item_dropped, signals.item_dropped) # ?????????? crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) # ???????????? crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) # ???????????? crawler.signals.connect(pipeline.spider_idle, signals.spider_idle) # ???? crawler.signals.connect(pipeline.spider_error, signals.spider_error) # ???? crawler.signals.connect(pipeline.request_scheduled, signals.request_scheduled) # ?????? crawler.signals.connect(pipeline.request_dropped, signals.request_dropped) # ?????? crawler.signals.connect(pipeline.response_received, signals.response_received) # ???? crawler.signals.connect(pipeline.response_downloaded, signals.response_downloaded) # ???? return pipeline
def engine_stopped(self): """ ???? :return: """ print time.strftime("%Y-%m-%d %H:%M:%S"), 'Pipeline Signals: engine_stopped' pass
def __init__(self): self.is_running = False dispatcher.connect(self.pause_crawler, signals.engine_stopped) self.setting = get_project_settings() self.process = None
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.items = {} self.found = {} dispatcher.connect(self.init_scrapy, signals.engine_started) dispatcher.connect(self.close_scrapy, signals.engine_stopped)
def __init__(self, asin, daily=0, *args, **kwargs): super().__init__(*args, **kwargs) self.asin = asin self.last_review = 0 self.profile_update_self = False # profile?????? self.updated = False # profile????? self.daily = True if int(daily) == 1 else False # ????????? self.start_urls = [ 'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=three_star' % self.asin, 'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=two_star' % self.asin, 'https://www.amazon.com/product-reviews/%s?sortBy=recent&filterByStar=one_star' % self.asin ] dispatcher.connect(self.update_profile_self, signals.engine_stopped) dispatcher.connect(self.init_profile, signals.engine_started)
def webdriver(self): """Return the webdriver instance, instantiate it if necessary.""" if self._webdriver is None: short_arg_classes = (webdriver.Firefox, webdriver.Ie) if issubclass(self._browser, short_arg_classes): cap_attr = 'capabilities' else: cap_attr = 'desired_capabilities' options = self._options options[cap_attr] = self._desired_capabilities self._webdriver = self._browser(**options) self._webdriver.set_window_size(settings.DRIVER_WINDOW_WIDTH, settings.DRIVER_WINDOW_HEIGHT) self._webdriver.set_page_load_timeout(self.crawler.settings.get('DOMAIN_TIMEOUT', 30)) self.crawler.signals.connect(self._cleanup, signal=engine_stopped) return self._webdriver
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.items = {} self.found = {} self.keyword_pool = {} self.store_poll = {} self.store_date = {} dispatcher.connect(self.init_scrapy, signals.engine_started) dispatcher.connect(self.close_scrapy, signals.engine_stopped)
def __init__(self, signal_manager, app): super(_QApplicationStopper, self).__init__() self._qapplication = weakref.ref(app) self.signals = signal_manager self.signals.connect(self, signal=signals.engine_stopped, weak=False)
def __call__(self): self.signals.disconnect(self, signals.engine_stopped) app = self._qapplication() if app is not None: app.quit()
def engine_stopped(): if QApplication.instance(): QApplication.instance().quit()
def engine_stopped(self): if self.log_task.running: self.log_task.stop() if self.reanimate_task.running: self.reanimate_task.stop()
def engine_stopped(self): print ('Pipeline???????========%s' % SavePipeline.initCount)