我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用elasticsearch.Elasticsearch()。
def __init__(self, lte=None, gte=None, limit=250, sort='date_disseminated,ASC', fastout=False, verify=True, endpoint='http://127.0.0.1/', start_offset=0): if gte and not lte: lte = datetime.now().isoformat() if lte and not gte: gte = '2000-01-01' self.lte = lte self.gte = gte self.limit = limit self.sort = sort self.fastout = fastout self.verify = verify self.endpoint = endpoint self.fcc_endpoint = 'https://ecfsapi.fcc.gov/filings' self.index_fields = mappings.FIELDS.keys() self.es = Elasticsearch(self.endpoint, timeout=30) self.start_offset = start_offset self.stats = {'indexed': start_offset, 'fetched': start_offset}
def __init__(self, settings): kwargs = settings.get('es').get('client') es_user = settings.get('es_user') es_pass = settings.get('es_pass') if es_user and es_pass: kwargs.update(**dict(http_auth=(es_user, es_pass))) self.client = Elasticsearch(**kwargs) self.timeout = settings.get('es').get('client').get('timeout') self.doc_type = settings.get('es').get('doc_type') self.index_name = settings.get('es').get('index') self.id_field = settings.get('id_field') self.bulk_size = settings.get('bulk_size', 1000) self.path_encoding = settings.get('path_encoding') self.actions = [] log.debug('ESStorer instance created: %s', self.client)
def run(args): elasticsearchServer = args[0] if len(args) else 'localhost:9200' indexName = 'nhs_conditions' docType = 'condition' es = Elasticsearch(elasticsearchServer) es.indices.delete(index=indexName, ignore=[400,404]) f = open('nhsPageContent','w') f.write('[') for model in get_pages_info_models('http://www.nhs.uk/Conditions/Pages/hub.aspx'): json = model.to_json() es.index(index=indexName, doc_type=docType, body=json) f.write(json + ",\n") f.write(']') f.close() es.indices.refresh(index=indexName)
def get_summary_statistics(): """ Obtains statistics about current sum of flows, packets, bytes. :return: JSON with status "ok" or "error" and requested data. """ try: # Elastic query client = elasticsearch.Elasticsearch([{'host': myconf.get('consumer.hostname'), 'port': myconf.get('consumer.port')}]) elastic_bool = [] elastic_bool.append({'range': {'@timestamp': {'gte': "now-5m", 'lte': "now"}}}) elastic_bool.append({'term': {'@type': 'protocols_statistics'}}) qx = Q({'bool': {'must': elastic_bool}}) s = Search(using=client, index='_all').query(qx) s.aggs.bucket('sum_of_flows', 'sum', field='flows') s.aggs.bucket('sum_of_packets', 'sum', field='packets') s.aggs.bucket('sum_of_bytes', 'sum', field='bytes') s.sort('@timestamp') result = s.execute() # Result Parsing into CSV in format: timestamp, tcp protocol value, udp protocol value data = "Timestamp, Flows, Packets, Bytes;" timestamp = "Last 5 Minutes" data += timestamp + ', ' +\ str(int(result.aggregations.sum_of_flows['value'])) + ', ' +\ str(int(result.aggregations.sum_of_packets['value'])) + ', ' +\ str(int(result.aggregations.sum_of_bytes['value'])) json_response = '{"status": "Ok", "data": "' + data + '"}' return json_response except Exception as e: json_response = '{"status": "Error", "data": "Elasticsearch query exception: ' + escape(str(e)) + '"}' return json_response
def setup_es(es_ip, es_port): """ Setup an Elasticsearch connection Parameters ---------- es_ip: string IP address for elasticsearch instance es_port: string Port for elasticsearch instance Returns ------- es_conn: an elasticsearch_dsl Search connection object. """ CLIENT = Elasticsearch([{'host' : es_ip, 'port' : es_port}]) S = Search(using=CLIENT, index="geonames") return S
def ESConnection(): parser = configparser.ConfigParser() conf_file = 'settings' fd = open(conf_file, 'r') parser.readfp(fd) fd.close() sections = parser.sections() for section in sections: options = parser.options(section) for option in options: if option == 'user': user = parser.get(section, option) if option == 'password': password = parser.get(section, option) if option == 'host': host = parser.get(section, option) if option == 'port': port = parser.get(section, option) if option == 'path': path = parser.get(section, option) #if option == 'genderize_key': key = parser.get(section, option) connection = "https://" + user + ":" + password + "@" + host + ":" + port + "/" + path es_write = Elasticsearch([connection], verify_certs=False) #es_write = Elasticsearch(["127.0.0.1:9200"]) return es_write
def __init__(self, start, end, telescopes=None, sites=None, instrument_types=None): try: self.es = Elasticsearch([settings.ELASTICSEARCH_URL]) except LocationValueError: logger.error('Could not find host. Make sure ELASTICSEARCH_URL is set.') raise ImproperlyConfigured('ELASTICSEARCH_URL') self.instrument_types = instrument_types self.available_telescopes = self._get_available_telescopes() sites = list({tk.site for tk in self.available_telescopes}) if not sites else sites telescopes = list({tk.telescope for tk in self.available_telescopes if tk.site in sites}) \ if not telescopes else telescopes self.start = start.replace(tzinfo=timezone.utc).replace(microsecond=0) self.end = end.replace(tzinfo=timezone.utc).replace(microsecond=0) cached_event_data = cache.get('tel_event_data') if cached_event_data: self.event_data = cached_event_data else: self.event_data = self._get_es_data(sites, telescopes) cache.set('tel_event_data', self.event_data, 1800)
def __init__(self, host='localhost', port='9200', protocol='http', path=None, user=None, password=None): """ Class constructor :param url: ElasticSearch host domain :param port: ElasticSearch port connection :param protocol: ElasticSearch protocol (typically http or https) :param path: ElasticSearch patch connection :param user: ElasticSearch user connection :param password: ElasticSearch password connection """ credentials = "" if user is not None or password is not None: credentials = user + ":" + password + "@" if path is None: path = "" connection = protocol + "://" + credentials + host + ":" + port + path print(connection) self.es = Elasticsearch([connection])
def __init__(self, host, test_id, debug=False): """ param host: the elasticsearch host test_id: id of the test to which we are reqstricting the queires """ self._es = Elasticsearch(host) self._id = test_id self.debug=debug self.functiondict = { 'no_proxy_errors' : self.check_no_proxy_errors, 'bounded_response_time' : self.check_bounded_response_time, 'http_success_status' : self.check_http_success_status, 'http_status' : self.check_http_status, # 'reachability' : self.check_reachability, 'bounded_retries' : self.check_bounded_retries, 'circuit_breaker' : self.check_circuit_breaker, 'at_most_requests': self.check_at_most_requests }
def get(doc_type, doc_id, fields=True): """ Get an Elasticsearch document. :param basestring doc_type: document type :param doc_id: document id, will be converted into basestring :param fields: if ``False``, returns whether the document is found as bool; if ``True``, returns the document dict; if list of string, returns the document dict with only the specified fields. :rtype: dict or bool """ ret = es.get( index='oclubs', doc_type=doc_type, id=doc_id, _source=fields ) if fields is not False: return ret['_source'] else: return ret['found']
def use_store(self): """ Opens a database to save data """ logging.info('Using Elasticsearch database') self.db = Elasticsearch( [self.settings.get('host', 'localhost:9200')], ) try: self.db.indices.create(index='mcp-watch', ignore=400) # may exist except ConnectionError as feedback: logging.error('- unable to connect') raise return self.db
def reset_store(self): """ Opens a database for points """ logging.info('Resetting Elasticsearch database') self.db = Elasticsearch( [self.settings.get('host', 'localhost:9200')], ) try: self.db.indices.create(index='mcp-watch', ignore=400) # may exist except ConnectionError as feedback: logging.error('- unable to connect') raise return self.db
def __init__(self, config, workload, tool="browbeat", cache_size=1000, max_cache_time=10): self.config = config self.cache = deque() self.max_cache_size = cache_size self.last_upload = datetime.datetime.utcnow() self.max_cache_age = datetime.timedelta(minutes=max_cache_time) self.logger = logging.getLogger('browbeat.elastic') self.es = elasticsearch.Elasticsearch([ {'host': self.config['elasticsearch']['host'], 'port': self.config['elasticsearch']['port']}], send_get_body_as='POST' ) self.workload = workload today = datetime.datetime.today() self.index = "{}-{}-{}".format(tool, workload, today.strftime('%Y.%m.%d'))
def get_elasticsearch(check_availability=False): """Return Elasticsearch instance. :param check_availability: check if nodes are available :returns: Elasticsearch or None on failure :rtype: elasticsearch.Elasticsearch """ nodes = config.get_config()["backend"]["connection"] try: es = elasticsearch.Elasticsearch(nodes) if check_availability: es.info() except Exception as e: LOG.warning( "Failed to query Elasticsearch nodes %s: %s" % (nodes, str(e))) raise return es
def init_es(timeout=TIMEOUT): log.info("connecting to %s %s", settings.ELASTICSEARCH_URL, settings.ELASTICSEARCH_PORT) auth = AWSRequestsAuth(aws_access_key=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, aws_host=settings.ELASTICSEARCH_URL, aws_region='us-west-1', aws_service='es') auth.encode = lambda x: bytes(x.encode('utf-8')) es = Elasticsearch(host=settings.ELASTICSEARCH_URL, port=settings.ELASTICSEARCH_PORT, connection_class=RequestsHttpConnection, timeout=timeout, max_retries=10, retry_on_timeout=True, http_auth=auth) return es
def setUp(self): # Clean index self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH]) try: self.es.indices.delete(index='datahub') self.es.indices.delete(index='events') except NotFoundError: pass self.es.indices.create('datahub') mapping = {'dataset': {'properties': self.MAPPING}} self.es.indices.put_mapping(doc_type='dataset', index='datahub', body=mapping) self.es.indices.create('events') mapping = {'event': {'properties': {'timestamp': {'type': 'date'}}}} self.es.indices.put_mapping(doc_type='event', index='events', body=mapping)
def setup(): try: from elasticsearch import Elasticsearch, ElasticsearchException except ImportError: raise unittest.SkipTest("elasticsearch-py not installed.") es = Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: es.info() except ElasticsearchException as e: raise unittest.SkipTest("elasticsearch not running on %r" % settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) global test_runner global old_config from django.test.runner import DiscoverRunner test_runner = DiscoverRunner() test_runner.setup_test_environment() old_config = test_runner.setup_databases()
def __create_connection(self, config): kwargs = { 'host': config['HOST'], 'port': config.get('PORT', 9200), 'use_ssl': config.get('USE_SSL', False), 'verify_certs': True, 'ca_certs': certifi.where() } if 'AWS_ACCESS_KEY' in config and \ 'AWS_SECRET_KEY' in config and \ 'AWS_REGION' in config: kwargs['connection_class'] = RequestsHttpConnection kwargs['http_auth'] = AWSRequestsAuth( aws_access_key=config['AWS_ACCESS_KEY'], aws_secret_access_key=config['AWS_SECRET_KEY'], aws_host=config['HOST'], aws_region=config['AWS_REGION'], aws_service='es') es = Elasticsearch(**kwargs) es._index = config['INDEX_NAME'] es._settings = config.get('INDEX_SETTINGS', DEFAULT_INDEX_SETTINGS) return es
def destroy(self): """ Deletes all elasticsearch history for Alphabay. It will delete all indexes matching: dminer-alphabay-* It will also delete the template for indexes. This template is named: dminer-alphabay-template """ es = Elasticsearch([":".join([str(self.host), str(self.port)])]) self.logger.info("Deleting index: dminer-alphabay-*") es.indices.delete("dminer-alphabay-*") self.logger.info("Deleting index template: dminer-dreammarket-template") es.indices.delete_template("dminer-alphabay-template")
def destroy(self): """ Deletes all elasticsearch history for Hansa. It will delete all indexes matching: dminer-hansa-* It will also delete the template for indexes. This template is named: dminer-hansa-template """ es = Elasticsearch([":".join([str(self.host), str(self.port)])]) self.logger.info("Deleting index: dminer-hansa-*") es.indices.delete("dminer-hansa-*") self.logger.info("Deleting index template: dminer-dreammarket-template") es.indices.delete_template("dminer-hansa-template")
def destroy(self): """ Deletes all elasticsearch history for DreamMarket. It will delete all indexes matching: dminer-dreammarket-* It will also delete the template for indexes. This template is named: dminer-dreammarket-template """ es = Elasticsearch([":".join([str(self.host), str(self.port)])]) self.logger.info("Deleting index: dminer-dreammarket-*") es.indices.delete("dminer-dreammarket-*") self.logger.info("Deleting index template: dminer-dreammarket-template") es.indices.delete_template("dminer-dreammarket-template")
def getKibiRelationConfig(indexName=".kibi", typeName="config" , elasticPort=9220, elasticHost="localhost"): es = Elasticsearch([{'host': elasticHost, 'port': elasticPort}], http_auth=(elasticUsername, elasticPassword)) mapping = es.search( index=indexName, doc_type=typeName, size=1000, request_timeout=1060, body={ 'query': { 'filtered': { 'query': { 'match_all': {} } } } } ) return mapping['hits']['hits'][0] #return mapping[SourceIndexName]["mappings"][SourceTypeName]
def lambda_handler(event, context): es = Elasticsearch(os.environ['ELASTICSEARCH_URL']) indices = es.indices.get('*') for record in event['Records']: # Kinesis data is base64 encoded so decode here payload = base64.b64decode(record['kinesis']['data']) create_object_payload = json.loads(payload) doc_type_name = create_object_payload['event_type'] index_name = create_object_payload['event_payload']['index_name'] payload_data = create_object_payload['event_payload']['data'] index_terms = index_name.split('_') del index_terms[-1] index_prefix = '_'.join(index_terms) latest_index_name = max(filter(lambda k: index_prefix in k, indices)) if latest_index_name != index_name: index_name = latest_index_name res = es.index(index=index_name, doc_type=doc_type_name, id=str(create_object_payload['object_id']), body=payload_data) for conn in es.transport.connection_pool.connections: conn.pool.close() return 'Successfully processed {} records.'.format(len(event['Records']))
def lambda_handler(event, context): es = Elasticsearch(os.environ['ELASTICSEARCH_URL']) indices_list = es.indices.get('*') for record in event['Records']: # Kinesis data is base64 encoded so decode here payload = base64.b64decode(record['kinesis']['data']) update_object_payload = json.loads(payload) doc_type_name = update_object_payload['event_type'] index_name = update_object_payload['event_payload']['index_name'] payload_data = update_object_payload['event_payload']['data'] index_terms = index_name.split('_') del index_terms[-1] index_prefix = '_'.join(index_terms) latest_index_name = max(filter(lambda k: index_prefix in k, indices_list)) if latest_index_name != index_name: index_name = latest_index_name res = es.update(index=index_name, doc_type=doc_type_name, id=str(update_object_payload['object_id']), body={'doc': payload_data, 'doc_as_upsert':True}) for conn in es.transport.connection_pool.connections: conn.pool.close() return 'Successfully processed {} records.'.format(len(event['Records']))
def elasticsearch(process_fixture_name): """ Create Elasticsearch client fixture. :param str process_fixture_name: elasticsearch process fixture name """ @pytest.fixture def elasticsearch_fixture(request): """Elasticsearch client fixture.""" process = request.getfixturevalue(process_fixture_name) if not process.running(): process.start() hosts = '{0!s}:{1!s}'.format(process.host, process.port) client = Elasticsearch(hosts=hosts) def drop_indexes(): client.indices.delete(index='*') request.addfinalizer(drop_indexes) return client return elasticsearch_fixture
def test_es(): """ Before running other tests, ensure connection to ES is established """ es = Elasticsearch() try: es.indices.create(INDEX) es.indices.delete(INDEX) return True except RequestError: print('Index already exists: skipping tests.') return False except ConnectionError: print('The ElasticSearch backend is not running: skipping tests.') return False except Exception as e: print('An unknown error occured connecting to ElasticSearch: %s' % e) return False
def clustering_pubs_page(request): if request.GET.get('index_name') is not None and request.GET.get('index_name') != '': dci = DocClusteringInfo() dci.doc_type = "publication" dci.cost = -1 dci.iter = -1 dci.k = -1 dci.index_name = request.GET.get('index_name') dci.save() if request.GET.get('k') is not None: clustering_pubs.tasks.cluster_index.delay(dci.index_name, k=int(request.GET.get('k'))) else: clustering_pubs.tasks.cluster_index.delay(dci.index_name) return redirect("/clustering_pubs/status/%s/" % dci.index_name) es = Elasticsearch() indexes = es.indices.get_mapping() return render(request, 'clustering_pubs.html', {'indexes': indexes})
def retrieve_dataset(index_name, doc_type, weight={'title': 5, 'abstract': 1}): es = Elasticsearch() results = es.search(index=index_name, doc_type=doc_type, size=10000)['hits']['hits'] dataset = {} for res in results: doc = DocumentInfo(res['_id']) term_vectors = es.termvectors(index=index_name, doc_type=doc_type, id=res['_id'], offsets=False, payloads=False, positions=False, fields='title,abstract', field_statistics=False)['term_vectors'] for zone in {'abstract', 'title'}: term_vector = term_vectors[zone]['terms'] for term in term_vector: stemmed = stem(term) if stemmed.isalpha(): if stemmed not in doc.tf: doc.tf[stemmed] = term_vector[term]['term_freq'] * weight[zone] else: doc.tf[stemmed] += term_vector[term]['term_freq'] * weight[zone] dataset[res['_id']] = doc return dataset
def indexing_status_page(request, id): es = Elasticsearch() crawl_info = CrawlInfo.objects.get(id=id) try: es.indices.refresh(index="index-%d" % crawl_info.id) percentage = int(es.count("index-%d" % crawl_info.id, crawl_info.type).get('count') * 100 / crawl_info.successful_crawls) percentage = max(1, percentage) except Exception as e: percentage = 0 if request.GET.get('type', 'HTML') == 'JSON': result = json.dumps({'status': 'OK', 'percent': percentage}, ensure_ascii=False, encoding='utf8') return HttpResponse(result, content_type='application/json; charset=utf-8') return render(request, 'indexing_status.html', {'percent': percentage})
def get_elasticsearch_object(): """ Creating an elasticsearch object to query the index """ try: es_servers = settings.ELASTICSEARCH_SERVERS es_servers = es_servers if isinstance(es_servers, list) \ else [es_servers] except AttributeError: es_servers = ["http://localhost:9200"]#["https://ahmia.fi/esconnection/"] try: timeout = settings.ELASTICSEARCH_TIMEOUT except AttributeError: timeout = 60 es_obj = Elasticsearch(hosts=es_servers, timeout=timeout) return es_obj
def setup_index(request, index_name): es = Elasticsearch() try: es.indices.create(index=index_name) except RequestError as e: if e.error == u'index_already_exists_exception': es.indices.delete(index_name) else: raise def fin(): try: es.indices.delete(index_name) except NotFoundError: pass request.addfinalizer(fin)
def open(self): """ Connect to ES cluster. Execute a command to check if connected on master to activate immediate connection to the DB because we need to know if DB server is available. Update log rotation time to force a log rotation """ logger.info("[elastic-logs] trying to connect to ES Cluster: %s", self.hosts) self.es = Elasticsearch(self.hosts.split(','), timeout=int(self.timeout)) try: self.es.cluster.health() logger.info("[elastic-logs] connected to the ES Cluster: %s", self.hosts) self.is_connected = CONNECTED self.next_logs_rotation = time.time() except TransportError, exp: logger.error("[elastic-logs] Cluster is not available: %s", str(exp)) self.is_connected = DISCONNECTED return False return True
def get_test_client(nowait=False, **kwargs): # construct kwargs from the environment kw = {'timeout': 30} if 'TEST_ES_CONNECTION' in os.environ: from elasticsearch import connection kw['connection_class'] = getattr(connection, os.environ['TEST_ES_CONNECTION']) kw.update(kwargs) client = Elasticsearch([os.environ.get('TEST_ES_SERVER', {})], **kw) # wait for yellow status for _ in range(1 if nowait else 100): try: client.cluster.health(wait_for_status='yellow') return client except ConnectionError: time.sleep(.1) else: # timeout raise SkipTest("Elasticsearch failed to start.")
def build_condition(field, value, kind='match', fuzzy=False): """Crea una condición para Elasticsearch. Args: field (str): Campo de la condición. value (str): Valor de comparación. fuzzy (bool): Bandera para habilitar tolerancia a errores. Returns: dict: Condición para Elasticsearch. """ if fuzzy and kind == 'match': query = {field: {'query': value, 'fuzziness': 1}} else: query = {field: value} return {kind: query}
def main(): parser = argparse.ArgumentParser( description="Export the Kibana dashboards together with" " all used visualizations, searches and index pattern") parser.add_argument("--url", help="Elasticsearch URL. By default: http://localhost:9200", default="http://localhost:9200") parser.add_argument("--regex", help="Regular expression to match all the dashboards to be exported. For example: metricbeat*", required=True) parser.add_argument("--kibana", help="Elasticsearch index where to store the Kibana settings. By default: .kibana ", default=".kibana") parser.add_argument("--dir", help="Output directory. By default: output", default="output") args = parser.parse_args() print("Export {} dashboards to {} directory".format(args.regex, args.dir)) print("Elasticsearch URL: {}".format(args.url)) print("Elasticsearch index to store Kibana's" " dashboards: {}".format(args.kibana)) es = Elasticsearch(args.url) ExportDashboards(es, args.regex, args.kibana, args.dir)
def setUp(self): # try to indice some fixtures to elasticsearch self.client = Client() fixtureA = {"listing_id":1,"drone": 2, "owner": 2, "description": "please rent myseediestdrone!", "time_posted": "2016-10-24T04:28:48.932Z", "price_per_day": 10.0} fixtureB = {"listing_id":2,"drone": 3, "owner": 3, "description": "please rent myforgeddrone!", "time_posted": "2016-10-24T04:28:48.991Z", "price_per_day": 14.0} es = Elasticsearch(['es']) es.index(index='listing_index', doc_type='listing', id=fixtureA['listing_id'], body=fixtureA) es.index(index='listing_index', doc_type='listing', id=fixtureB['listing_id'], body=fixtureB) es.indices.refresh(index='listing_index') producer.send('new-listings-topic', json.dumps(some_new_listing).encode('utf-8')) response = self.client.post(reverse('create-listing'), fixtureA) print("test_create_listing POST " + str(response)) resp = json.loads(response.content.decode('utf8')) self.assertEquals(response.status_code, 200) print("listing_atts" + str(resp)) # append number to test to get python to run defs in correct order
def get_client(): global client if client is not None: return client client = Elasticsearch([os.environ.get('TEST_ES_SERVER', {})], timeout=300) # wait for yellow status for _ in range(100): time.sleep(.1) try: client.cluster.health(wait_for_status='yellow') return client except ConnectionError: continue else: # timeout raise SkipTest("Elasticsearch failed to start.")
def __init__(self, dataset_type, dataset_id): """Data Access Object to interact with autocomplete The autocomplete is provided by Elasticsearch, and it is not divided by datasets, instead it is divided by dataset type. """ # TODO: Generate an index on elasticsearch with allowed fields # The entity must be loaded with a dataset # Elasticsearch global params self.ELASTIC_ENDPOINT = "http://elasticsearch:9200/" self.ELASTIC_AUTH = ("elastic", "changeme") # Create Elasticsearch object self.es = Elasticsearch(self.ELASTIC_ENDPOINT, http_auth=self.ELASTIC_AUTH) self.index = "entities" self.type = dataset_type self.dataset_id = dataset_id # Test if index exists, and if not, creates it if not self.es.indices.exists(index=self.index): self.generate_index(self.index)
def __init__(self, endpoint='http://localhost:9200/', verify=True, limit=10000, date=None): self.endpoint = endpoint self.verify = verify self.es = Elasticsearch(self.endpoint) self.limit = int(limit) self.indexed = 0 self.date = date
def __init__(self, endpoint='http://localhost:9200/', source=None, limit=100): self.es = Elasticsearch(endpoint) self.last_call = time.time() self.source = source self.limit = limit
def run(self): es = Elasticsearch(self.endpoint) es.indices.create(index='fcc-comments', body=mappings.MAPPINGS) print('created fcc-comments index')
def get_elasticsearch(self): """ Get a connection to the Elasticsearch cluster. Currently on supports a single host. :returns: ``elasticsearch.Elasticsearch`` """ return Elasticsearch(hosts=self.config['es_host'], timeout=30)
def get_elasticsearch_helper(self): """ Get helpers module for Elasticsearch. Used to bulk index documents. :returns: package ``elasticsearch.helpers`` """ return helpers
def test_es_instance(): '''An Elasticsearch instance can be passed to Storage or will be created''' storage = Storage() assert repr(storage) == "Storage <Elasticsearch([{}])>" es = Elasticsearch(['localhost']) storage = Storage(es) if six.PY2: assert repr(storage) == "Storage <Elasticsearch([{u'host': u'localhost'}])>" else: assert repr(storage) == "Storage <Elasticsearch([{'host': 'localhost'}])>"
def __init__(self, es=None): # Use the passed `es` or create a new Elasticsearch instance self.__es = es if es is not None else Elasticsearch()
def main(_): maybe_download_and_extract() create_graph() b = Beanstalk.from_url("beanstalk://127.0.0.1:6014") b.watch("oio-process") while True: try: job_id, data = b.reserve() except ResponseError: continue meta = json.loads(data) url = meta["url"] print(url) if url["path"].split('.')[len(url["path"].split('.'))-1] == 'png': b.delete(job_id) continue s = object_storage.ObjectStorageAPI(url["ns"], "http://127.0.0.1:6006") meta, stream = s.object_fetch(url["account"], url["user"], url["path"]) image = (np.frombuffer("".join(stream), np.uint8)) #image = np.array(image)[:, :, 0:3] result=run_inference_on_image(image) print(json.dumps(result)) s.object_update(url["account"], url["user"], url["path"], result) # /!\ Change the ip /!\ es = Elasticsearch(['http://192.168.99.1:9200']) # Retrieve the metadata from the object meta, stream = s.object_fetch(url["account"], url["user"], url["path"]) # Create the index in ElasticSearch if it does not exist if not es.indices.exists(url["account"].lower()): es.indices.create(index=url["account"].lower()) # Push the metadatas to Elasticsearch res = es.index(index=url["account"].lower(), doc_type=url["user"].lower(), body=meta) es.indices.refresh(index=url["account"].lower()) b.delete(job_id)
def set_up_elastic(url): if not url: url = os.getenv("CROSSREF_ES_URL") es = Elasticsearch(url, serializer=JSONSerializerPython2(), retry_on_timeout=True, max_retries=100) return es