我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scrapy.log.msg()。
def process_item(self, item, spider): valid = True for data in item: if not data: valid = False raise DropItem('Missing{0}!'.format(data)) if valid: self.collection.insert(dict(item)) log.msg('??????!', level=log.DEBUG, spider=spider) return item # def testdb(self): # # ???MongoHQ # con = pymongo.Connection("paulo.mongohq.com",10042) # db = con.mytest # db.authenticate("root", "sa123") # db.urllist.drop()
def process_item(self, item, spider): if len(item['ip_port']): a = Proxy( ip_port=item['ip_port'], type=item['type'], level=item['level'], location=item['location'], speed=item['speed'], lifetime=item['lifetime'], lastcheck=item['lastcheck'], rule_id=item['rule_id'], source=item['source'] ) session = loadSession() try: session.merge(a) session.commit() except MySQLdb.IntegrityError, e: log.msg("MySQL Error: %s" % str(e), _level=logging.WARNING) return item else: log.msg("ip_port is invalid!",_level=logging.WARNING)
def process_item(self, item, spider): #import pudb; pu.db #val = "{0}\t{1}\t{2}\t{3}\t".format(item['appid'], item['title'], item['recommended'], item['intro']) #self.file.write('--------------------------------------------\n') #self.file.write(val) valid = True for data in item: if not data: valid = False raise DropItem("Missing {0}!".format(data)) if valid: self.collection.insert(dict(item)) log.msg("new app added to MongoDB database!", level=log.DEBUG, spider=spider) return item
def parse_model_selled(self, response): log.msg('[parse_selled] %s' % response.url) series_id = response.meta['series_id'] data = json.loads(response.body_as_unicode()) models = data['Spec'] count = 0 for model in models: model_id = model['Id'] model_name = model['Name'] group = model['GroupName'] price = model['Price'] model = ModelItem() model['id'] = model_id model['name'] = model_name model['series_id'] = series_id model['group'] = group model['price'] = price yield model count += 1 log.msg('[parse_selled] model count is %d' % count)
def parse_xpath(self, response, xpath): appItemList = [] sel = Selector(response) for url in sel.xpath(xpath).extract(): url = urljoin(response.url, url) log.msg("Catch an application: %s" % url, level=log.INFO) appItem = AppItem() appItem['url'] = url appItemList.append(appItem) return appItemList #def parse_anzhi(self, response, xpath): # appItemList = [] # hxs = HtmlXPathSelector(response) # for script in hxs.select(xpath).extract(): # id = re.search(r"\d+", script).group() # url = "http://www.anzhi.com/dl_app.php?s=%s&n=5" % (id,) # appItem = AppItem() # appItem['url'] = url # appItemList.append(appItem) # return appItemList
def _do_upinsert(self, conn, item, spider): conn.execute("""SELECT EXISTS( SELECT 1 FROM wstable WHERE id = %s )""", (item['id'],)) ret = conn.fetchone()[0] uri, title, author, time, description, content, images, view, id1 = self._parseItem(item) if ret: conn.execute(""" update wstable set uri = %s, title = %s, author = %s, time1 = %s, description = %s, content = %s, images = %s, view1 = %s where id = %s """, (uri,title,author,time,description,content,images,view,id1)) # log.msg(""" # update wstable set uri = %s, title = %s, author = %s, time1 = %s, description = %s, content = %s, images = %s, view1 = %s where id = %s # """ % (uri,title,author,time,description,content,images,view,id1)) else: # log.msg(""" # insert into wstable(id, uri, title, author, time1, description, content, images, view1) # values(%s, %s, %s, %s, %s, %s, %s, %s, %s) # """ % (id1,uri,title,author,time,description,content,images,view)) conn.execute(""" insert into wstable(id, uri, title, author, time1, description, content, images, view1) values(%s, %s, %s, %s, %s, %s, %s, %s, %s) """, (id1,uri,title,author,time,description,content,images,view)) # log.msg('finished item %s' % item['id']) print 'finished item %s' % item['id']
def process_item(self, item, spider): if spider.name == 'baiduTopStockSpider': collection = self.db[settings['stock']] d = dict(item) cursor = list(collection.find({'num': d["num"], 'source': d["source"]})) if cursor: collection.update({'_id': cursor[0]['_id']}, d) else: collection.insert(d) log.msg("stock added to MongoDB database!", level=log.DEBUG, spider=spider) elif spider.name == 'xueqiuPostSpider': collection = self.db['post'] collection.save(dict(item)) log.msg("post added to MongoDB database!", level=log.DEBUG, spider=spider) return item
def parse(self, response): """ default parse method, rule is not useful now """ # import pdb; pdb.set_trace() response = response.replace(url=HtmlParser.remove_url_parameter(response.url)) hxs = HtmlXPathSelector(response) index_level = self.determine_level(response) log.msg("Parse: index level:" + str(index_level)) if index_level in [1, 2, 3, 4]: self.save_to_file_system(index_level, response) relative_urls = self.get_follow_links(index_level, hxs) if relative_urls is not None: for url in relative_urls: log.msg('yield process, url:' + url) yield Request(url, callback=self.parse) elif index_level == 5: personProfile = HtmlParser.extract_person_profile(hxs) linkedin_id = self.get_linkedin_id(response.url) linkedin_id = UnicodeDammit(urllib.unquote_plus(linkedin_id)).markup if linkedin_id: personProfile['_id'] = linkedin_id personProfile['url'] = UnicodeDammit(response.url).markup yield personProfile
def determine_level(self, response): """ determine the index level of current response, so we can decide wether to continue crawl or not. level 1: people/[a-z].html level 2: people/[A-Z][\d+].html level 3: people/[a-zA-Z0-9-]+.html level 4: search page, pub/dir/.+ level 5: profile page """ import re url = response.url if re.match(".+/[a-z]\.html", url): return 1 elif re.match(".+/[A-Z]\d+.html", url): return 2 elif re.match(".+/people-[a-zA-Z0-9-]+", url): return 3 elif re.match(".+/pub/dir/.+", url): return 4 elif re.match(".+/search/._", url): return 4 elif re.match(".+/pub/.+", url): return 5 log.msg("Crawl cannot determine the url's level: " + url) return None
def get_last_time(self): try: self.cu.execute('CREATE TABLE history (time TEXT,result TEXT,spider_name TEXT primary key)') last_time="2015-1-1 00:00:00" except: try: self.cu.execute('SELECT time FROM history where spider_name="'+self.spider_name+'"') last_time = self.cu.fetchone()[0] log.msg('************* '+last_time,level=log.WARNING) except: last_time="2015-5-1 00:00:00" log.msg('************* '+last_time,level=log.WARNING) last_time = time.strptime(last_time, '%Y-%m-%d %H:%M:%S') last_time = time.mktime(last_time) return last_time
def insert_new_time(self): if time.mktime(time.strptime(self.item_max_time, '%Y-%m-%d %H:%M:%S')) < time.time(): if self.sqlite_flag: try: log.msg('delete from history where spider_name='+self.spider_name,level=log.WARNING) self.cu.execute('delete from history where spider_name="'+self.spider_name+'"') self.sx.commit() except sqlite3.OperationalError,e: log.msg('__________',level=log.WARNING) pass sql = "insert into history values(?,?,?)" params = (self.item_max_time,self.item_max_id,self.spider_name) self.cu.execute(sql,params) self.sx.commit() self.close_sqlite()
def _retry(self, request, reason, spider): retries = request.meta.get('retry_times', 0) + 1 if retries <= self.max_retry_times: log.msg(format="Retrying %(request)s " \ "(failed %(retries)d times): %(reason)s", level=log.DEBUG, spider=spider, request=request, retries=retries, reason=reason) retryreq = request.copy() retryreq.meta['retry_times'] = retries retryreq.dont_filter = True # our priority setup is different from super retryreq.meta['priority'] = retryreq.meta['priority'] - 10 return retryreq else: log.msg(format="Gave up retrying %(request)s "\ "(failed %(retries)d times): %(reason)s", level=log.DEBUG, spider=spider, request=request, retries=retries, reason=reason)
def process_item(self,item,spider): m = hashlib.md5() m.update(item['url']) url_MD5 = m.hexdigest() content_simhash = Simhash(self.get_features(item['content'])).value language = 'en' query_json='{"fields":["url_MD5","content_simhash"],"query":{"filtered":{"filter":{"term":{"url_MD5":"'+url_MD5+'"}}}}}' es = Elasticsearch(host='192.168.1.14',port=9200,timeout=1000) res = es.search(index="hiddenwebs", body=query_json) if res['hits']['total'] == 0: es.index(index="hiddenwebs", doc_type="hiddenwebpages",body={"url":item['url'],"content":item['content'],"create_time":item['create_time'],"domain_name":item['domain_name'],"url_MD5":url_MD5,"title":item['title'],"content_simhash":content_simhash,"language":language}) else: flag = 0 for hit in res['hits']['hits']: #print content_simhash #print hit["fields"]["content_simhash"][0] if int(hit["fields"]["content_simhash"][0]) == int(content_simhash): log.msg('The similar pages in es %s'%(item['url']),level=log.INFO) flag = 1 es.index(index="hiddenwebs", doc_type="hiddenwebpages", id=hit['_id'], body={"create_time":item['create_time']}) break if flag == 0 : es.index(index="hiddenwebs", doc_type="hiddenwebpages",body={"url":item['url'],"content":item['content'],"create_time":item['create_time'],"domain_name":item['domain_name'],"url_MD5":url_MD5,"title":item['title'],"content_simhash":content_simhash,"language":language})
def process_item(self, item, spider): if item['site'] == 'Qua': if item['company']: item['company'] = wash(item['company']) if item['flight_time']: item['flight_time'] = wash(item['flight_time']) if item['airports']: item['airports'] = wash(item['airports']) if item['passtime']: item['passtime'] = wash(item['passtime']) if item['price']: item['price'] = wash(item['price']) for data in item: if not data: raise DropItem("Missing data!") self.collection.insert(dict(item)) log.msg("Question added to MongoDB database!", level=log.DEBUG, spider=spider) elif item['site'] == 'Ctrip': self.collection.insert(dict(item)) log.msg("Question added to MongoDB database!", level=log.DEBUG, spider=spider) return item
def parse(self, response): for build in foreigh_7: item = SightItem() log.msg('build: ' + build, level=log.INFO) if baidu_geo_api(build.encode('utf-8')) is not None: lng, lat = baidu_geo_api(build.encode('utf-8')) else: lng, lat = 1, 1 item['lng'] = lng item['lat'] = lat item['id_num'] = self.id_num self.id_num += 1L item['category'] = u'??????' item['title'] = build.encode('utf-8') pinyin = lazy_pinyin(build) item['pinyin'] = ''.join(pinyin).upper() if lng == 1 or lat == 1: log.msg('no landmark found: ' + 'at line 36,' + build, level=log.INFO) continue baike_url = 'https://baike.baidu.com/item/%s' % build yield scrapy.Request(baike_url, meta={'item': item}, callback=self.content_parse)
def content_parse(self, response): log.msg('run into content_parse at line 40', level=log.INFO) item = response.meta['item'] result = response.xpath( '//div[@class="main-content"]/div[@class="lemma-summary"]/div[@class="para"]').extract() # ???? if len(result) != 0: pattern = re.compile(r'<[^>]+>', re.S) description = pattern.sub('', result[0]).encode('utf-8') else: description = 'description_null' item['description'] = description picture_url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%s&ic=0&width=0&height=0' % item[ 'title'].decode('utf-8') log.msg('picture_url: ' + picture_url, level=log.INFO) log.msg('run out content_parse at line 51', level=log.INFO) yield scrapy.Request(picture_url, meta={'item': item, 'splash': { 'endpoint': 'render.html', 'args': {'wait': 0.5} } }, callback=self.picture_parse)
def google_geo_api(sight_name): sight_name = sight_name.decode('utf-8') key = "AIzaSyDJtV9r7rAr9EBwlQ8Rbxvo6e7CkJsLn4k" url = "https://maps.googleapis.com/maps/api/geocode/json?address=%s&key=AIzaSyAw-IJpHf6CYtb4OVgrj2MB7pmXlbSs7aY%s" % (sight_name, key) print 'url: %s' % url response = urllib2.urlopen(url.encode('utf-8')) result = response.read() json_loads = json.loads(result) if json_loads.get('status') == 'OK': location = json_loads.get('results')[0].get('geometry').get('location') lat = location.get('lat') lat = float('%.2f' % lat) lng = location.get('lng') lng = float('%.2f' % lng) print ('lat: %s\r\n lng %s' % (lat, lng)) return lng, lat else: log.msg('There is no result about lat and lng') return 1, 1 # json_text = json.loads(result) # lng = json_text.get('geometry') # print ('lng: %s' % lng)
def process_exception(self, request, exception, spider): proxy = request.meta['proxy'] log.msg('Removing failed proxy <%s>, %d proxies left' % ( proxy, len(self.proxies))) try: del self.proxies[proxy] except ValueError: pass
def process_item(self, item, spider): valid = True print '--'*40 for data in item: if not data: valid = False raise DropItem("Missing {0}!".format(data)) if valid: try: self.collection.insert(dict(item)) log.msg("Question added to MongoDB database!", level=log.DEBUG, spider=spider) except: print 'ggggg'*40 return item
def process_item(self, item, spider): valid = True for data in item: if not data: valid = False raise DropItem('Missming{}!'.format(data)) if valid: self.coll.insert(dict(item)) log.msg('item added to mongodb database !',level=log.DEBUG,spider=spider) return item
def process_item(self, item, spider): valid = True for data in item: if not data: valid = False raise DropItem("Missing {0}!".format(data)) if valid: self.collection.insert(dict(item)) log.msg("Event added to MongoDB database!", level=log.DEBUG, spider=spider) return item
def close_spider(self, spider, reason): if self._dump: log.msg("Dumping Scrapy stats:\n" + pprint.pformat(self.get_stats()), spider=spider) self._persist_stats(self.get_stats(), spider)
def process_request(self, request, spider): ua = random.choice(self.user_agent_list) if ua: #???????useragent #print "********Current UserAgent:%s************" %ua #?? log.msg('Current UserAgent: '+ua, _level=logging.INFO) request.headers.setdefault('User-Agent', ua) #the default user_agent_list composes chrome,I E,firefox,Mozilla,opera,netscape #for more user agent strings,you can find it in http://www.useragentstring.com/pages/useragentstring.php
def process_request(self, request, spider): # Set the location of the proxy pro_adr = random.choice(self.proxyList) log.msg("Current Proxy <%s>" % pro_adr,_level=logging.INFO) request.meta['proxy'] = "http://" + pro_adr
def parse_datetime(value): try: d = parse(value) except ValueError: log.msg('Unable to parse %s' % value, level=log.WARNING) return value else: return d.isoformat()
def parse_date(value): try: d = parse(value) except ValueError: log.msg('Unable to parse %s' % value, level=log.WARNING) return value else: return d.strftime("%Y-%m-%d")
def start_listening(self): self.port = listen_tcp(self.portrange, self.host, self) h = self.port.getHost() log.msg(format='Web service listening on %(host)s:%(port)d', level=log.DEBUG, host=h.host, port=h.port)
def parse(self, response): book_id = response.url.strip('/').split('/')[-1] log.msg('book_id[%s].' % book_id) book_name = response.xpath('//title/text()')[0].extract().strip(' (??)') bean = BookName() bean['book_id'] = book_id bean['book_name'] = book_name yield bean
def parse(self, response): url = response.url log.msg('[url]%s' % url) body = response.body soup = BeautifulSoup(body, 'lxml').select('.cardetail-infor')[0] text = str(self.gettextonly(soup)).decode('utf-8') m = re.findall(ur'(????|????|?????|????|????|? ? ?|? ? ?|????|??????)?\n?(.+)\n', text, re.M | re.U) map = dict([(d[0], d[1]) for d in m]) result = SpecItem() result['id'] = url.split('/')[-1] result['spec'] = map yield result
def readIds(self): names = filter(lambda x: 'model' in x and 'json' in x, os.listdir('/Users/king/Work/code/codePool/python/autohome_spider/data')) print names if not names: log.msg('[spec]no model data file in data dir.', log.ERROR) return model_file_name = names[-1] f = codecs.open('/Users/king/Work/code/codePool/python/autohome_spider/data/%s' % model_file_name, 'r') ids = [line['id'] for line in json.loads(f.read())] log.msg(len(ids), log.INFO) return ids
def parse(self, response): log.msg('[parse] %s' % response.url) # ????ID???????????URL?request?? for seriesId in response.xpath('body/dl').re(r'id="s(\d+)"'): series_page_url = "http://www.autohome.com.cn/" + seriesId log.msg('series_page_url:%s' % series_page_url) request = scrapy.Request(url=series_page_url, callback=self.parse_model_selling, dont_filter=True) request.meta['series_id'] = seriesId yield request # ???????
def process_item(self, item, spider): valid=True for data in item: if not data: valid=False raise DropItem('Missing{0}!'.format(data)) if valid: self.collection.insert(dict(item)) log.msg('question added to mongodb database!', level=log.DEBUG,spider=spider) return item
def process_item(self, item, spider): for data in item: if not data: raise DropItem("Missing data!") #self.collection.update({'url': item['url']}, dict(item), upsert=True) self.collection.insert(dict(item)) log.msg("Question added to MongoDB database!", level=log.DEBUG, spider=spider) return None
def process_request(self,request,spider): user_agent = UserAgent() ua = user_agent.random if ua: log.msg('Current UserAgent: '+ua, level=log.INFO) request.headers.setdefault('User-Agent', ua)
def process_item(self, item, spider): log.msg("Catch an AppItem", level=log.INFO) return item
def process_item(self, item, spider): try: self.conn.execute('insert into apps(url) values(?)', (item['url'],) ) self.conn.commit() log.msg("Inserting into database"); except sqlite3.IntegrityError: print "Duplicated" return item
def process_item(self, item, spider): for field in self.required_fields: if not item[field]: # log.msg("Field '%s' missing" % (field)) print "Field '%s' missing" % (field) raise DropItem("Field '%s' missing: %r" % (field, item)) return item
def process_item(self, item, spider): if 'image_urls' in item: images = [] abpath = '%s/%s/%s/%s' % (spider.name, item['id'][0],item['id'][1],item['id']) dir_path = '%s/%s' % (settings['IMAGES_STORE'], abpath) if not os.path.exists(dir_path) and len(item['image_urls'])>0: os.makedirs(dir_path) for image_url in item['image_urls']: name = image_url.split('/')[-1] _i = name.rfind('!') if _i > 4: name = name[:_i] name = re.sub('\\\|/|:|\*|\?|"|<|>','_',name) image_file_name = name[-100:] file_path = '%s/%s' % (dir_path, image_file_name) images.append((image_url, file_path)) if os.path.exists(file_path): continue with open(file_path, 'wb') as handle: try: response = requests.get(image_url, stream=True) for block in response.iter_content(1024): if not block: break handle.write(block) # log.msg("download img to %s" % file_path) except: continue item['images'] = images if not images: pass else: _ = images[0][1] item['firstimage'] = '%s/%s' % (abpath, _[_.rfind('/')+1:]) print item['firstimage'] return item
def scan(html): alerts = list() matches = HTMLClassifier.yara_rules.match(data=html) if not len(matches) > 0: return alerts for match in matches['html']: print match alert_reason = ", ".join([" ".join(t.split('_')) for t in match['tags']]) alert_data = "\n".join([s['data'] for s in match['strings']]) alerts.append((alert_reason, alert_data)) log.msg("Yara HTML Classification Match: " + alert_reason, level=log.INFO) return alerts
def scan(uri): alerts = list() matches = URLClassifier.yara_rules.match(data=uri.encode('ascii', 'ignore')) if not len(matches) > 0: return alerts for match in matches['urls']: alert_reason = ", ".join([" ".join(t.split('_')) for t in match['tags']]) alert_data = "\n".join([s['data'] for s in match['strings']]) alerts.append((alert_reason, alert_data)) log.msg("Yara URL Classification Match: " + alert_reason, level=log.INFO) return alerts
def scan(js): alerts = list() matches = JSClassifier.yara_rules.match(data=js.encode('ascii', 'ignore')) if not len(matches) > 0: return alerts for match in matches['js']: alert_reason = ", ".join([" ".join(t.split('_')) for t in match['tags']]) alert_data = "\n".join([s['data'] for s in match['strings']]) alerts.append((alert_reason, alert_data)) log.msg("Yara JS Classification Match: " + alert_reason, level=log.INFO) return alerts
def process_item(self, item, spider): if not type(item) == Alert: return item uri = item['uri'] if not uri: raise DropItem("Not a valid alert URI: ", uri) if spider.custom_whitelist: for (pattern) in spider.custom_whitelist: if pattern[0] in uri: raise DropItem("Whitelisted domain found in Alert: ", uri) if spider.alexa_whitelist: try: parsed_uri = urlparse(uri) parsed_domain = '{uri.netloc}'.format(uri=parsed_uri) domain = get_tld(uri) for alexa_domain in spider.alexa_whitelist: if domain.endswith(alexa_domain): raise DropItem("Alert domain found in Alexa Whitelist: ", domain) except (TldIOError,TldDomainNotFound,TldBadUrl) as e: log.msg("Error parsing TLD. Still allowing alert for " + uri, level=log.WARNING) except: raise return item
def spider_opened(self, spider): self.conn = MySQLdb.connect(host=settings.MYSQL_HOST, db=settings.MYSQL_DB, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWORD, charset='utf8', use_unicode=True) cursor = spider.conn.cursor() sql_str = "SELECT pattern from whitelist" cursor.execute(sql_str) self.custom_whitelist = cursor.fetchall() try: alexa_whitelist_file = pkgutil.get_data("malspider", "resources/alexa-1k-whitelist.csv").decode('ascii') self.alexa_whitelist = alexa_whitelist_file.splitlines() except: log.msg("Error loading alexa whitelist...", level=log.ERROR)
def parse_response(self, response): page_id = ObjectId() analyzer = Analyzer(response) alerts = analyzer.inspect_response() elems = analyzer.get_resource_elems() page = analyzer.get_page_info() for alert in alerts: alert['org_id'] = self.org yield alert for elem in elems: elem['page_id'] = page_id elem['org_id'] = self.org yield elem page['page_id'] = page_id page['org_id'] = self.org yield page #limit page depth if self.pages_crawled >= settings.PAGES_PER_DOMAIN: return for link in LxmlLinkExtractor(unique=True, deny_extensions=list(), allow_domains=self.allowed_domains).extract_links(response): if not link.url in self.already_crawled and self.pages_crawled <= settings.PAGES_PER_DOMAIN: self.already_crawled.add(link.url) self.pages_crawled = self.pages_crawled + 1 log.msg("Yielding request for " + link.url, level=log.INFO) yield WebdriverRequest(link.url, callback=self.parse_response) elif self.pages_crawled >= settings.PAGES_PER_DOMAIN: log.msg("Reached max crawl depth: " + str(settings.PAGES_PER_DOMAIN), level=log.INFO) return else: log.msg("avoiding duplicate request for: " + link.url, level=log.INFO)
def _download_request(self, request, spider): """Download a request URL using webdriver.""" log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG) request.manager.webdriver.get(request.url) #time.sleep(5) take_screenshot = getattr(settings, 'TAKE_SCREENSHOT', None) screenshot_loc = getattr(settings, 'SCREENSHOT_LOCATION', None) if take_screenshot and screenshot_loc: screenshot_location = screenshot_loc + str(randint(10000,10000000)) + '.png' request.manager.webdriver.save_screenshot(screenshot_location) request.meta['screenshot'] = screenshot_location request.meta['User-Agent'] = request.headers.get('User-Agent') request.meta['Referer'] = request.headers.get('Referer') return WebdriverResponse(request.url, request.manager.webdriver)
def _do_action_request(self, request, spider): """Perform an action on a previously webdriver-loaded page.""" log.msg('Running webdriver actions %s' % request.url, level=log.DEBUG) request.actions.perform() return WebdriverResponse(request.url, request.manager.webdriver)
def process_request(self, request, spider): ua = random.choice(self.user_agent_list) if ua: #???????useragent print "********Current UserAgent:%s************" %ua #?? log.msg('Current UserAgent: '+ua, level=1) request.headers.setdefault('User-Agent', ua) #the default user_agent_list composes chrome,I E,firefox,Mozilla,opera,netscape #for more user agent strings,you can find it in http://www.useragentstring.com/pages/useragentstring.php
def process_item(self,item,spider): for data in item: if not data: raise DropItem("Missing data!") self.collection.update({'url':item['url']},dict(item),upsert=True) log.msg("Question added to MongoDB !",level=log.DEBUG,spider=spider) return item