我想为某些spider启用一些http代理,而对其他spider禁用它们。
我可以做这样的事情吗?
# settings.py proxy_spiders = ['a1' , b2'] if spider in proxy_spider: #how to get spider name ??? HTTP_PROXY = 'http://127.0.0.1:8123' DOWNLOADER_MIDDLEWARES = { 'myproject.middlewares.RandomUserAgentMiddleware': 400, 'myproject.middlewares.ProxyMiddleware': 410, 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None } else: DOWNLOADER_MIDDLEWARES = { 'myproject.middlewares.RandomUserAgentMiddleware': 400, 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None }
如果上面的代码不起作用,还有其他建议吗?
但是从1.0.0版本开始,scrapy中有一个新功能,您可以像这样覆盖每个spider的设置:
class MySpider(scrapy.Spider): name = "my_spider" custom_settings = {"HTTP_PROXY":'http://127.0.0.1:8123', "DOWNLOADER_MIDDLEWARES": {'myproject.middlewares.RandomUserAgentMiddleware': 400, 'myproject.middlewares.ProxyMiddleware': 410, 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None}} class MySpider2(scrapy.Spider): name = "my_spider2" custom_settings = {"DOWNLOADER_MIDDLEWARES": {'myproject.middlewares.RandomUserAgentMiddleware': 400, 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None}}