我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用elasticsearch.TransportError()。
def _parse_mappings(app, app_type=None): """ .. todo: Need to parse out result set that presents field list and type """ doc = {} try: mappings = es.indices.get_mapping(index=app, doc_type=[app_type], ignore=[400, 404]) # mappings = yaml.safe_load (json.ess (mappings)) # print json.dumps (mappings [app]["mappings"], indent=4, # separators=(',', ': ')) ignore = ["properties", "format"] except TransportError as e: doc['error'] = e.info except Exception as e: doc['error'] = str(e) return doc
def remove(self, obj_or_string, commit=True): doc_id = get_identifier(obj_or_string) if not self.setup_complete: try: self.setup() except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) return try: self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) if commit: self.conn.indices.refresh(index=self.index_name) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True)
def open(self): """ Connect to ES cluster. Execute a command to check if connected on master to activate immediate connection to the DB because we need to know if DB server is available. Update log rotation time to force a log rotation """ logger.info("[elastic-logs] trying to connect to ES Cluster: %s", self.hosts) self.es = Elasticsearch(self.hosts.split(','), timeout=int(self.timeout)) try: self.es.cluster.health() logger.info("[elastic-logs] connected to the ES Cluster: %s", self.hosts) self.is_connected = CONNECTED self.next_logs_rotation = time.time() except TransportError, exp: logger.error("[elastic-logs] Cluster is not available: %s", str(exp)) self.is_connected = DISCONNECTED return False return True
def run(self, args): query = Query() for cmd in self.commands: cmd_instance = cmd() cmd_instance.run(query, args) if cmd_instance.errors: return self.generate_error_response(cmd_instance.errors) _format = args.get(constants.PARAM_FORMAT, constants.API_DEFAULT_VALUES[constants.PARAM_FORMAT]) formatter = self.get_formatter(_format) try: return formatter.run(query, args) except TransportError: return self.generate_error_response([strings.ELASTICSEARCH_ERROR]) except EndOfPeriodError as e: return self.generate_error_response([e.message])
def delete_all(self): '''Deletes all tag data. This does not destroy the ES index, but instead only deletes all tags with the configured doc types. ''' try: self.conn.indices.delete_mapping( index=self.index, doc_type=self.type_tag) except TransportError: logger.warn('type %r in index %r already deleted', self.index, self.type_tag, exc_info=True) try: self.conn.indices.delete_mapping( index=self.index, doc_type=self.type_assoc) except TransportError: logger.warn('type %r in index %r already deleted', self.index, self.type_assoc, exc_info=True)
def _create_index(self): 'Create the index' # This can race, but that should be OK. # Worst case, we initialize with the same settings more than # once. if self.conn.indices.exists(index=self.index): return False try: settings = {} if self.shards is not None: settings['number_of_shards'] = self.shards self.conn.indices.create( index=self.index, timeout=60, request_timeout=60, body={ 'settings': settings, }) except TransportError: # Hope that this is an "index already exists" error... logger.warn('index already exists? OK', exc_info=True) return True
def get_applications(): """ Fetch all the registered applications in Distill. .. note:: Private indexes starting with a period are not included in the result set :return: [dict] dictionary of all registered applications and meta info """ doc = {} query = { "aggs": { "count_by_type": { "terms": { "field": "_type", "size": 100 } } } } try: cluster_status = es.cat.indices(h=["index"], pri=False) x = cluster_status.splitlines() for idx in x: idx = idx.rstrip() # Ignore private indexes (like .kibana or .stout) if idx[:1] != '.': response = es.search(index=idx, body=query) d = {} for tag in response["aggregations"]["count_by_type"]["buckets"]: d[tag['key']] = tag['doc_count'] doc[idx] = d except TransportError as e: doc['error'] = e.info except Exception as e: doc['error'] = str(e) return doc
def _get_cluster_status(app, app_type=None): """ Return cluster status, index health, and document count as string @todo figure out how to count individual documents stored at an app_type (currently shows only index count) :param app: [string] application name (e.g. xdata_v3) :return: [dict] dictionary of index meta data including field names """ doc = {} try: cluster_status = es.cat.indices(index=app, h=["health", "status", "docs.count"], pri=True, ignore=[400, 404]) v = str(cluster_status).split(" ") m = ["health", "status", "num_docs"] doc = dict(zip(m, v)) # Add back application doc["application"] = app except TransportError as e: doc['error'] = e.info except Exception as e: doc['error'] = str(e) doc['fields'] = _get_all_fields(app, app_type) return doc
def _get_all_fields(app, app_type=None): """ Retrieve all possible fields in an application :param app: [string] application name (e.g. xdata_v3) :param app_type: [string] application type (e.g. logs) :return: [list] list of strings representing the fields names """ d = list() query = { "aggs": { "fields": { "terms": { "field": "_field_names", "size": 100 } } } } try: response = es.search(index=app, doc_type=app_type, body=query) for tag in response['aggregations']['fields']['buckets']: d.append(tag['key']) except TransportError as e: d.append(str(e.info)) except Exception as e: d.append(str(e)) return d
def __init__(self): # parse out query pass # @staticmethod # def filter (app, app_type=None, q=''): # field = q.get ("field") if q.get ("field") else "" # size = q.get ("size") if q.get ("size") else 10 # query = { "aggs" : { # "count_by_type" : { # "filter" : { "term" : { field : }} # "terms" : { # "field" : field, # "size" : 100 # } # } # } # } # d = {} # # try: # response = es.search (index=app, doc_type=app_type, body=query) # # for tag in response['aggregations']['count_by_type']['buckets']: # # d [tag ['key']] = tag ['doc_count'] # # except TransportError as e: # # d ['error'] = e.info # # except Exception as e: # # d ['error'] = str (e) # # return jsonify (d) # return jsonify (response)
def histogram(app, app_type=None, q=""): """ Only works on numerical data. """ field = q.get("field") if q.get("field") else "" interval = 50 query = {"aggs": { "hist_agg": { "histogram": { "field": field, "interval": interval } } } } d = {} try: response = es.search(index=app, doc_type=app_type, body=query) for tag in response['aggregations']['hist_agg']['buckets']: d[tag['key']] = tag['doc_count'] except TransportError as e: d['error'] = e.info except Exception as e: d['error'] = str(e) return jsonify(d)
def raw_search(self, query): try: return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) except elasticsearch.TransportError: return {}
def update(self, index, iterable, commit=True): if not self.setup_complete: try: self.setup() except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) return prepped_docs = [] for obj in iterable: try: prepped_data = index.full_prepare(obj) final_data = {} # Convert the data to make sure it's happy. for key, value in prepped_data.items(): final_data[key] = self._from_python(value) final_data['_id'] = final_data[ID] prepped_docs.append(final_data) except SkipDocument: self.log.debug(u"Indexing for object `%s` skipped", obj) except elasticsearch.TransportError as e: if not self.silently_fail: raise # We'll log the object identifier but won't include the actual object # to avoid the possibility of that generating encoding errors while # processing the log message: self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={"data": {"index": index, "object": get_identifier(obj)}}) bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') if commit: self.conn.indices.refresh(index=self.index_name)
def __init__(self, ex, **kwargs): (t, v, tb) = sys.exc_info() message = ('ElasticSearch Exception: ' '%s.' % str(ex)) super(PartyCrasherError, self).__init__(message, **kwargs) self.original_traceback = tb self.original_type = repr(t) self.original_value = repr(v) if isinstance(ex, TransportError): self.es_status_code = ex.status_code self.es_error = ex.error self.es_info = ex.info self.es_description = str(ex)
def sniff_hosts(self, initial=False): """ Obtain a list of nodes from the cluster and create a new connection pool using the information retrieved. To extract the node connection parameters use the ``nodes_to_host_callback``. :arg initial: flag indicating if this is during startup (``sniff_on_start``), ignore the ``sniff_timeout`` if ``True`` """ node_info = yield from self._get_sniff_data(initial) hosts = list(filter(None, (self._get_host_info(n) for n in node_info))) # we weren't able to get any nodes, maybe using an incompatible # transport_schema or host_info_callback blocked all - raise error. if not hosts: raise TransportError("N/A", "Unable to sniff hosts - no viable hosts found.") # remember current live connections orig_connections = self.connection_pool.connections[:] self.set_connections(hosts) # close those connections that are not in use any more for c in orig_connections: if c not in self.connection_pool.connections: yield from c.close()
def main_loop(self, method, url, params, body, headers=None, ignore=(), timeout=None): for attempt in range(self.max_retries + 1): connection = self.get_connection() try: status, headers, data = yield from connection.perform_request( method, url, params, body, headers=headers, ignore=ignore, timeout=timeout) except TransportError as e: if method == 'HEAD' and e.status_code == 404: return False retry = False if isinstance(e, ConnectionTimeout): retry = self.retry_on_timeout elif isinstance(e, ConnectionError): retry = True elif e.status_code in self.retry_on_status: retry = True if retry: # only mark as dead if we are retrying self.mark_dead(connection) # raise exception on last retry if attempt == self.max_retries: raise else: raise else: if method == 'HEAD': return 200 <= status < 300 # connection didn't fail, confirm it's live status self.connection_pool.mark_live(connection) if data: data = self.deserializer.loads(data, headers.get('content-type')) return data
def raw_search(self, query): try: return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME']) except elasticsearch.TransportError: return {} # TODO mb test logs
def clear(self, models=None, commit=True): # We actually don't want to do this here, as mappings could be # very different. # if not self.setup_complete: # self.setup() if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.conn.indices.delete(index=self.index_name, ignore=404) self.setup_complete = False self.existing_mapping = {} else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) # Delete by query in Elasticsearch asssumes you're dealing with # a ``query`` root object. :/ query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} self.conn.delete_by_query(index=self.index_name, doc_type='modelresult', body=query) except elasticsearch.TransportError as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True)
def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): from haystack import connections if not self.setup_complete: self.setup() # Deferred models will have a different class ("RealClass_Deferred_fieldname") # which won't be in our registry: model_klass = model_instance._meta.concrete_model index = connections[self.connection_alias].get_unified_index().get_index(model_klass) field_name = index.get_content_field() params = {} if start_offset is not None: params['search_from'] = start_offset if end_offset is not None: params['search_size'] = end_offset - start_offset doc_id = get_identifier(model_instance) try: raw_results = self.conn.mlt(index=self.index_name, doc_type='modelresult', id=doc_id, mlt_fields=[field_name], **params) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e, exc_info=True) raw_results = {} return self._process_results(raw_results, result_class=result_class)
def clear(self, models=None, commit=True): """ Clears the backend of all documents/objects for a collection of models. :param models: List or tuple of models to clear. :param commit: Not used. """ if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.conn.indices.delete(index=self.index_name, ignore=404) self.setup_complete = False self.existing_mapping = {} self.content_field_name = None else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) # Delete using scroll API query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} generator = scan(self.conn, query=query, index=self.index_name, doc_type='modelresult') actions = ({ '_op_type': 'delete', '_id': doc['_id'], } for doc in generator) bulk(self.conn, actions=actions, index=self.index_name, doc_type='modelresult') self.conn.indices.refresh(index=self.index_name) except elasticsearch.TransportError as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True)
def clear_elasticsearch_index(): # Wipe it clean. raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['default']['URL']) try: raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: connections['default'].get_backend().setup_complete = False
def raw_search(self, query): try: return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME']) except elasticsearch.TransportError: return {}
def _write_table(self): import elasticsearch as es if not isinstance(self.stream, es.Elasticsearch): raise ValueError( "stream must be an elasticsearch.Elasticsearch instance") self._verify_value_matrix() self._preprocess() mappings = self._get_mappings() try: result = self.stream.indices.create( index=self.index_name, body=mappings) self._logger.logger.debug(result) except es.TransportError as e: if e.error == "index_already_exists_exception": # ignore already existing index self._logger.logger.debug( "{:s}: {}".format(e.__class__.__name__, e)) else: raise for body in self._get_body(): try: self.stream.index( index=self.index_name, body=body, doc_type=self.document_type) except es.exceptions.RequestError as e: self._logger.logger.error( "message={}, body={}".format(e, body))
def commit(self): """Process list of dict (yes) and push them to DB """ self.total_objs += len(self.nlist) count = 0 def gen_events(events): dicts = list() for d in events: dicts.extend([{'index': {'_index': 'nxapi', '_type': 'events'}}, d.to_dict()]) yield dicts.pop(-2) yield dicts.pop(-1) events = list() for entry in self.nlist: event = Event(_index=self.index) for key, value in entry.items(): setattr(event, key, value) event.whitelisted = False event.comments = "import on"+str(datetime.datetime.now()) events.append(event) count += 1 try: ret = self.client.bulk(gen_events(events)) ## ToDo parse ret to selectively loop over events to events.save() whatever happens except TransportError as e: logging.warning("We encountered an error trying to continue.") for event in events: event.save(using=self.client) ## ToDo find a way to change the hardcoded 'events' for ES doctype ## elasticsearch_dsl Issue 689 self.total_commits += count logging.debug("Written "+str(self.total_commits)+" events") del self.nlist[0:len(self.nlist)]
def _verify_mapping(self): index_client = elasticsearch.client.IndicesClient(self._es_conn) try: mapping = index_client.get_mapping(index=self._index, doc_type=self.DOC_TYPE) except elasticsearch.TransportError as e: if e.status_code != 404: raise if e.error != 'type_missing_exception': raise mapping = {} if not mapping.get(self._index, None) or \ self.DOC_TYPE not in mapping[self._index]['mappings']: missing_fields = self.DOC_MAPPING.keys() else: current_mapping = mapping[self._index]['mappings'][ self.DOC_TYPE]['properties'] # We are not going to force re-indexing, so won't be checking the # mapping format missing_fields = [key for key in self.DOC_MAPPING.keys() if key not in current_mapping] if missing_fields: new_mapping = dict([(k, v) for k, v in self.DOC_MAPPING.items() if k in missing_fields]) # Elasticsearch 5.x deprecated the "string" type. We convert the # string fields into the appropriate 5.x types. # TODO: Once we remove support for the 2.x clusters, we should # remove this code and create the new mappings for each field. if self._server_version >= StrictVersion('5.0'): new_mapping = dict([(k, self._update_string_mapping(v)) for k, v in new_mapping.items()]) index_client.put_mapping(index=self._index, doc_type=self.DOC_TYPE, body={'properties': new_mapping})
def update_all_es_model_mappings(self): """ Update all of the Elasticsearch model mappings that are currently deployed in the configured deployment. :return: None """ from .sqlalchemy import get_all_organization_uuids from wselasticsearch import bootstrap_index_model_mappings org_uuids = get_all_organization_uuids(self.db_session) logger.warning( "Now updating ES model mappings for %s organizations." % (len(org_uuids),) ) for org_uuid in org_uuids: logger.warning( "Updating ES model mappings for organization %s." % (org_uuid,) ) try: bootstrap_index_model_mappings(index=org_uuid, delete_first=False) except TransportError as e: logger.error( "Error thrown when attempting to set mappings for index %s: %s" % (org_uuid, e.message) ) logger.warning( "Updated all ES model mappings for all organizations in the configured database." ) # Protected Methods # Private Methods
def test_suggest__transport_error_with_status_code(self, mock_essuggest): mock_essuggest.side_effect = TransportError(status.HTTP_404_NOT_FOUND, "Error") url = reverse('complaint_search:suggest') param = {"text": "test"} response = self.client.get(url, param) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual({"error": "Elasticsearch error: Error"}, response.data)
def test_suggest__transport_error_without_status_code(self, mock_essuggest): mock_essuggest.side_effect = TransportError('N/A', "Error") url = reverse('complaint_search:suggest') param = {"text": "test"} response = self.client.get(url, param) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual({"error": "Elasticsearch error: Error"}, response.data)
def test_search__transport_error_with_status_code(self, mock_essearch): mock_essearch.side_effect = TransportError(status.HTTP_404_NOT_FOUND, "Error") url = reverse('complaint_search:search') response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual({"error": "Elasticsearch error: Error"}, response.data)
def test_search__transport_error_without_status_code(self, mock_essearch): mock_essearch.side_effect = TransportError('N/A', "Error") url = reverse('complaint_search:search') response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual({"error": "Elasticsearch error: Error"}, response.data)
def test_suggest__transport_error_with_status_code(self, mock_essuggest): mock_essuggest.side_effect = TransportError( status.HTTP_404_NOT_FOUND, "Error" ) url = reverse('complaint_search:suggest_company') param = {"text": "test"} response = self.client.get(url, param) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual( {"error": "Elasticsearch error: Error"}, response.data )
def test_document__transport_error_with_status_code(self, mock_esdocument): mock_esdocument.side_effect = TransportError(status.HTTP_404_NOT_FOUND, "Error") url = reverse('complaint_search:complaint', kwargs={"id": "123456"}) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual({"error": "Elasticsearch error: Error"}, response.data)
def test_document__transport_error_without_status_code(self, mock_esdocument): mock_esdocument.side_effect = TransportError('N/A', "Error") url = reverse('complaint_search:complaint', kwargs={"id": "123456"}) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual({"error": "Elasticsearch error: Error"}, response.data)
def test_suggest__transport_error_without_status_code( self, mock_essuggest ): mock_essuggest.side_effect = TransportError('N/A', "Error") url = reverse('complaint_search:suggest_zip') param = {"text": "test"} response = self.client.get(url, param) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual( {"error": "Elasticsearch error: Error"}, response.data )
def test_suggest__transport_error_with_status_code(self, mock_essuggest): mock_essuggest.side_effect = TransportError( status.HTTP_404_NOT_FOUND, "Error" ) url = reverse('complaint_search:suggest_zip') param = {"text": "test"} response = self.client.get(url, param) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual( {"error": "Elasticsearch error: Error"}, response.data )
def _free_text_query(self, searchphrase, doc_types, params): ''' If 'fields' parameter is passed, only these fields would be returned and 'highlights' would be added only if it is of the fields parameters. If there is not a 'fields' parameter, then fields are included by default ''' highlight = self._get_free_text_highlight() source_filter = SourceDataStructureOptions.getSource(params.datastructure) if params.fields: source_filter["includes"] = params.fields body = {'query': self._get_free_text_query(searchphrase), 'size': params.size, 'from': params.start_from, '_source': source_filter, "explain": current_app.config['DEBUG'], "suggest": self._get_free_text_suggestions(searchphrase) } if highlight is not None: body['highlight'] = highlight try: res = self._cached_search(index=self._index_search, doc_type=doc_types, body=body, ) except TransportError as e : # TODO: remove this try. needed to go around rare elastiscsearch error due to fields with different mappings if e.error == u'search_phase_execution_exception': return {} raise return res
def search(self, query_string, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } if not self.setup_complete: self.setup() search_kwargs = self.build_search_kwargs(query_string, **kwargs) search_kwargs['from'] = kwargs.get('start_offset', 0) order_fields = set() for order in search_kwargs.get('sort', []): for key in order.keys(): order_fields.add(key) geo_sort = '_geo_distance' in order_fields end_offset = kwargs.get('end_offset') start_offset = kwargs.get('start_offset', 0) if end_offset is not None and end_offset > start_offset: search_kwargs['size'] = end_offset - start_offset try: raw_results = self.conn.search(body=search_kwargs, index=self.index_name, doc_type='modelresult', _source=True) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) raw_results = {} return self._process_results(raw_results, highlight=kwargs.get('highlight'), result_class=kwargs.get('result_class', SearchResult), distance_point=kwargs.get('distance_point'), geo_sort=geo_sort)
def unique_terms(app, app_type=None, q=""): """ Aggregate the number of unique terms in a field. Missing values are counted and marked as "N/A". .. todo:: Need to incorporate QueryBuilder library instead of manually generating queries. :param app: [string] application name :param app_type: [string] application type :param field: [string] field to search against for unique values :param size: [int] the top size terms returned in the result. Default value is 10. :param min_hits: [int] return tags which have been found in min_hits or more. Default value is 1. :return: [dict] dictionary of results """ field = q.get("field") if q.get("field") else "" size = q.get("size") if q.get("size") else 10000 min_hits = q.get("min_hits") if q.get("min_hits") else 0 print field query = {"aggs": { "terms_agg": { "terms": { "field": field, "size": size, "min_doc_count": min_hits, "missing": "N/A" } } } } d = {} try: response = es.search(index=app, doc_type=app_type, body=query) for tag in response['aggregations']['terms_agg']['buckets']: d[tag['key']] = tag['doc_count'] except TransportError as e: d['error'] = e.info except Exception as e: d['error'] = str(e) return jsonify(d)
def clear_elasticsearch_index(): # Wipe it clean. raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: connections['elasticsearch'].get_backend().setup_complete = False
def clear(self, models=None, commit=True): # We actually don't want to do this here, as mappings could be # very different. # if not self.setup_complete: # self.setup() if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.conn.indices.delete(index=self.index_name, ignore=404) self.setup_complete = False self.existing_mapping = {} else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) # Delete by query in Elasticsearch asssumes you're dealing with # a ``query`` root object. :/ query = {"query": {"query_string": {"query": " OR ".join(models_to_delete)}}} if elasticsearch.__version__[0] != 2: self.conn.delete_by_query(index=self.index_name, doc_type='modelresult', body=query) else: for result in scan( self.conn, query=query, index=self.index_name, doc_type='modelresult', _source=False, scroll='1m'): self.conn.delete( index=result['_index'], doc_type=result['_type'], id=result['_id'], refresh=True, ignore=404 ) except elasticsearch.TransportError as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True)
def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): from haystack import connections if not self.setup_complete: self.setup() # Deferred models will have a different class ("RealClass_Deferred_fieldname") # which won't be in our registry: model_klass = model_instance._meta.concrete_model index = connections[self.connection_alias].get_unified_index().get_index(model_klass) field_name = index.get_content_field() params = {} if start_offset is not None: if elasticsearch.__version__[0] < 2: params['search_from'] = start_offset else: params['from_'] = start_offset if end_offset is not None: if elasticsearch.__version__[0] < 2: params['search_size'] = end_offset - start_offset else: params['size'] = end_offset - start_offset doc_id = get_identifier(model_instance) try: if elasticsearch.__version__[0] < 2: raw_results = self.conn.mlt(index=self.index_name, doc_type='modelresult', id=doc_id, mlt_fields=[field_name], **params) else: doc = self.conn.get(index=self.index_name, doc_type='modelresult', id=doc_id) query = { 'query': { 'more_like_this': { 'fields': [field_name], 'like': doc['_source'].get(field_name) } } } raw_results = self.conn.search( body=query, index=self.index_name, doc_type='modelresult', _source=True, **params ) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e, exc_info=True) raw_results = {} return self._process_results(raw_results, result_class=result_class)
def _get_sniff_data(self, initial=False): previous_sniff = self.last_sniff # reset last_sniff timestamp self.last_sniff = time.time() # use small timeout for the sniffing request, should be a fast api call timeout = self.sniff_timeout if not initial else None tasks = [ c.perform_request('GET', '/_nodes/_all/http', timeout=timeout) # go through all current connections as well as the # seed_connections for good measure for c in chain(self.connection_pool.connections, (c for c in self.seed_connections if c not in self.connection_pool.connections)) ] done = () try: while tasks: # execute sniff requests in parallel, wait for first to return done, tasks = yield from asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED, loop=self.loop) # go through all the finished tasks for t in done: try: _, headers, node_info = t.result() node_info = self.deserializer.loads(node_info, headers.get('content-type')) except (ConnectionError, SerializationError) as e: logger.warn('Sniffing request failed with %r', e) continue node_info = list(node_info['nodes'].values()) return node_info else: # no task has finished completely raise TransportError("N/A", "Unable to sniff hosts.") except: # keep the previous value on error self.last_sniff = previous_sniff raise finally: # clean up pending futures for t in chain(done, tasks): t.cancel()