Python scrapy.exceptions 模块,UsageError() 实例源码

我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用scrapy.exceptions.UsageError()

项目:scrapy-image    作者:lamphp    | 项目源码 | 文件源码
def process_options(self, args, opts):
        try:
            self.settings.setdict(arglist_to_dict(opts.set),
                                  priority='cmdline')
        except ValueError:
            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)

        if opts.logfile:
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')

        if opts.loglevel:
            self.settings.set('LOG_ENABLED', True, priority='cmdline')
            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')

        if opts.nolog:
            self.settings.set('LOG_ENABLED', False, priority='cmdline')

        if opts.pidfile:
            with open(opts.pidfile, "w") as f:
                f.write(str(os.getpid()) + os.linesep)

        if opts.pdb:
            failure.startDebugMode()
项目:User-Python-Write-a-web-crawler    作者:xiexiaoshinick    | 项目源码 | 文件源码
def process_options(self, args, opts):
        ScrapyCommand.process_options(self, args, opts)
        try:
            opts.spargs = arglist_to_dict(opts.spargs)
        except ValueError:
            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
        if opts.output:
            if opts.output == '-':
                self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
            else:
                self.settings.set('FEED_URI', opts.output, priority='cmdline')
            feed_exporters = without_none_values(
                self.settings.getwithbase('FEED_EXPORTERS'))
            valid_output_formats = feed_exporters.keys()
            if not opts.output_format:
                opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
            if opts.output_format not in valid_output_formats:
                raise UsageError("Unrecognized output format '%s', set one"
                                 " using the '-t' switch or as a file extension"
                                 " from the supported list %s" % (opts.output_format,
                                                                  tuple(valid_output_formats)))
            self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
    #??
项目:scrapy-streaming    作者:scrapy-plugins    | 项目源码 | 文件源码
def run(self, args, opts):
        if len(args) != 1:
            raise UsageError()
        command = args[0]

        arguments = _parse_arguments(opts.args)
        spider = ExternalSpider('StreamingSpider', command, arguments)
        loader = ExternalSpiderLoader.from_settings(self.settings, load_spiders=False)

        loader.crawl(spider)
项目:domain-discovery-crawler    作者:TeamHG-Memex    | 项目源码 | 文件源码
def run(self, args, opts):
        if len(args) != 4:
            raise UsageError()
        spider_name, url, login, password = args

        crawler = self.crawler_process.create_crawler(spider_name)
        scheduler = Scheduler.from_settings(self.settings)
        spider = crawler.spidercls.from_crawler(crawler)
        scheduler.open(spider)

        add_login(spider, url, login, password, queue=scheduler.queue)
项目:domain-discovery-crawler    作者:TeamHG-Memex    | 项目源码 | 文件源码
def run(self, args, opts):
        if len(args) != 1:
            raise UsageError()

        crawler = self.crawler_process.create_crawler(args[0])
        scheduler = Scheduler.from_settings(self.settings)
        spider = crawler.spidercls.from_crawler(crawler)
        scheduler.open(spider)
        stats = scheduler.queue.get_stats()

        print('\nQueue size: {len}, domains: {n_domains}\n'.format(**stats))
        print_top = 10
        printed_count = 0
        queues = stats['queues']
        print('{:<50}\tCount\tScore'.format('Domain'))
        for queue, score, count in queues[:print_top]:
            printed_count += count
            domain = queue.rsplit(':')[-1]
            print('{:<50}\t{}\t{:.0f}'.format(domain, count, score))
        others_count = sum(count for _, _, count in queues) - printed_count
        if others_count:
            print('...')
            print('{:<50}\t{}'.format(
                'other {}:'.format(len(queues) - print_top), others_count))
            print()

        if opts.output:
            with open(opts.output, 'w') as f:
                json.dump(stats, f,
                          ensure_ascii=False, indent=True, sort_keys=True)
            print('Stats dumped to {}'.format(opts.output))
项目:domain-discovery-crawler    作者:TeamHG-Memex    | 项目源码 | 文件源码
def run(self, args, opts):
        if not args:
            raise UsageError()
        if len(args) == 1 and '*' in args[0]:
            # paths were not expanded (docker)
            filenames = glob.glob(args[0])
        else:
            filenames = args
        del args
        filtered_filenames = [
            f for f in filenames
            if re.match(r'[a-z0-9]{12}\.csv$', os.path.basename(f))]
        filenames = filtered_filenames or filenames
        if not filenames:
            raise UsageError()

        response_logs = []
        for filename in filenames:
            with json_lines.open(filename) as f:
                response_logs.append(pd.DataFrame(f))
        print('Read data from {} files'.format(len(filenames)))

        all_rpms = [rpms for rpms in (
            get_rpms(name, rlog, step=opts.step, smooth=opts.smooth)
            for name, rlog in zip(filenames, response_logs))
                    if rpms is not None]
        if all_rpms:
            print_rpms(all_rpms, opts)

        print_scores(response_logs, opts)
项目:Tieba_Spider    作者:Aqua-Dream    | 项目源码 | 文件源码
def set_pages(self, pages):
        if len(pages) == 0:
            begin_page = 1
            end_page = 999999
        else:
            begin_page = pages[0]
            end_page = pages[1]
        if begin_page <= 0:
            raise UsageError("The number of begin page must not be less than 1!")
        if begin_page > end_page:
            raise UsageError("The number of end page must not be less than that of begin page!")
        self.settings.set('BEGIN_PAGE', begin_page, priority='cmdline')
        self.settings.set('END_PAGE', end_page, priority='cmdline')
项目:Tieba_Spider    作者:Aqua-Dream    | 项目源码 | 文件源码
def run(self, args, opts):
        self.set_pages(opts.pages)
        self.settings.set('GOOD_ONLY', opts.good_only)
        self.settings.set('SEE_LZ', opts.see_lz)
        if opts.filter:
            try:
                opts.filter = eval('filter.' + opts.filter)
            except:
                raise UsageError("Invalid filter function name!")
        self.settings.set("FILTER", opts.filter)
        cfg = config.config()
        if len(args) >= 3:
            raise UsageError("Too many arguments!")

        self.settings.set('MYSQL_HOST', cfg.config['MYSQL_HOST'])
        self.settings.set('MYSQL_USER', cfg.config['MYSQL_USER'])
        self.settings.set('MYSQL_PASSWD', cfg.config['MYSQL_PASSWD'])

        tbname = cfg.config['DEFAULT_TIEBA']
        if len(args) >= 1:
            tbname = args[0]
        if isinstance(tbname, unicode):
            tbname = tbname.encode('utf8')

        dbname = None    
        for key in cfg.config['MYSQL_DBNAME'].keys():
            if key.encode('utf8') == tbname:
                dbname = cfg.config['MYSQL_DBNAME'][key]
        if len(args) >= 2:
            dbname = args[1]
            cfg.config['MYSQL_DBNAME'][tbname.decode('utf8')] = dbname
        if not dbname:
            raise UsageError("Please input database name!")

        self.settings.set('TIEBA_NAME', tbname, priority='cmdline')
        self.settings.set('MYSQL_DBNAME', dbname, priority='cmdline')

        config.init_database(cfg.config['MYSQL_HOST'], cfg.config['MYSQL_USER'], cfg.config['MYSQL_PASSWD'], dbname)

        log = config.log(tbname, dbname, self.settings['BEGIN_PAGE'], opts.good_only, opts.see_lz)
        self.settings.set('SIMPLE_LOG', log)

        self.crawler_process.crawl('tieba', **opts.spargs)
        self.crawler_process.start()

        cfg.save()