我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用scrapy.log.WARNING。
def get_last_time(self): try: self.cu.execute('CREATE TABLE history (time TEXT,result TEXT,spider_name TEXT primary key)') last_time="2015-1-1 00:00:00" except: try: self.cu.execute('SELECT time FROM history where spider_name="'+self.spider_name+'"') last_time = self.cu.fetchone()[0] log.msg('************* '+last_time,level=log.WARNING) except: last_time="2015-5-1 00:00:00" log.msg('************* '+last_time,level=log.WARNING) last_time = time.strptime(last_time, '%Y-%m-%d %H:%M:%S') last_time = time.mktime(last_time) return last_time
def insert_new_time(self): if time.mktime(time.strptime(self.item_max_time, '%Y-%m-%d %H:%M:%S')) < time.time(): if self.sqlite_flag: try: log.msg('delete from history where spider_name='+self.spider_name,level=log.WARNING) self.cu.execute('delete from history where spider_name="'+self.spider_name+'"') self.sx.commit() except sqlite3.OperationalError,e: log.msg('__________',level=log.WARNING) pass sql = "insert into history values(?,?,?)" params = (self.item_max_time,self.item_max_id,self.spider_name) self.cu.execute(sql,params) self.sx.commit() self.close_sqlite()
def parse_datetime(value): try: d = parse(value) except ValueError: log.msg('Unable to parse %s' % value, level=log.WARNING) return value else: return d.isoformat()
def parse_date(value): try: d = parse(value) except ValueError: log.msg('Unable to parse %s' % value, level=log.WARNING) return value else: return d.strftime("%Y-%m-%d")
def process_item(self, item, spider): if not type(item) == Alert: return item uri = item['uri'] if not uri: raise DropItem("Not a valid alert URI: ", uri) if spider.custom_whitelist: for (pattern) in spider.custom_whitelist: if pattern[0] in uri: raise DropItem("Whitelisted domain found in Alert: ", uri) if spider.alexa_whitelist: try: parsed_uri = urlparse(uri) parsed_domain = '{uri.netloc}'.format(uri=parsed_uri) domain = get_tld(uri) for alexa_domain in spider.alexa_whitelist: if domain.endswith(alexa_domain): raise DropItem("Alert domain found in Alexa Whitelist: ", domain) except (TldIOError,TldDomainNotFound,TldBadUrl) as e: log.msg("Error parsing TLD. Still allowing alert for " + uri, level=log.WARNING) except: raise return item
def warn(msg): log.msg(str(msg), level=log.WARNING)
def _conditional_insert(self,tx,item): # ori_html_path = self.save_html(item) # item['repost_post_id'] = ori_html_path query=u"insert ignore into post (url, topic_id, topic_kws, site_id, site_name, title, content, pt_time, st_time) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)" param=(item['topic_url'], item['topic_id'], item['topic_kw'], item['topic_site_id'], item['topic_site_name'], item['topic_title'], item['topic_content'], item['topic_pt_time'], item['topic_st_time']) tx.execute(query,param) log.msg('insert one',level=log.WARNING) print '---- insert one ----' # sql = 'insert into '+ item['table_name'] +' (id ,url,board, site_id, data_type , title , content, post_time, scratch_time , poster_name,language_type,repost_post_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE post_time=%s' # param = (item['topic_url'],item['topic_url'],item['topic_board'], item['site_id'],item['data_type'],item['topic_title'], item['topic_content'], item['topic_post_time'],item['scratch_time'], item['topic_author'],0,item['repost_post_id'],item['topic_post_time']) # tx.execute(sql,param)
def _conditional_insert(self,tx,item): # ori_html_path = self.save_html(item) # item['repost_post_id'] = ori_html_path query=u"insert ignore into post (url, topic_id, topic_kws, site_id, site_name, title, content, pt_time, st_time) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)" param=(item['topic_url'], item['topic_id'], item['topic_kw'], item['topic_site_id'], item['topic_site_name'], item['topic_title'], item['topic_content'], item['topic_pt_time'], item['topic_st_time']) tx.execute(query,param) log.msg('insert one',level=log.WARNING) # sql = 'insert into '+ item['table_name'] +' (id ,url,board, site_id, data_type , title , content, post_time, scratch_time , poster_name,language_type,repost_post_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE post_time=%s' # param = (item['topic_url'],item['topic_url'],item['topic_board'], item['site_id'],item['data_type'],item['topic_title'], item['topic_content'], item['topic_post_time'],item['scratch_time'], item['topic_author'],0,item['repost_post_id'],item['topic_post_time']) # tx.execute(sql,param)
def open(self, spider): super(RecorderScheduler, self).open(spider) self.stats_manager = StatsManager(spider.crawler.stats) settings = spider.crawler.settings self.recorder_enabled = settings.get('RECORDER_ENABLED', DEFAULT_RECORDER_ENABLED) if not self.recorder_enabled: log.msg('Recorder disabled!', log.WARNING) return log.msg('Starting recorder', log.INFO) recorder_storage = settings.get('RECORDER_STORAGE_ENGINE', None) if not recorder_storage: self.recorder_enabled = False log.msg('Missing Recorder storage! Recorder disabled...', log.WARNING) return self.graph = graphs.Manager( engine=recorder_storage, drop_all_tables=settings.getbool('RECORDER_STORAGE_DROP_ALL_TABLES', DEFAULT_RECORDER_STORAGE_DROP_ALL_TABLES), clear_content=settings.getbool('RECORDER_STORAGE_CLEAR_CONTENT', DEFAULT_RECORDER_STORAGE_CLEAR_CONTENT))