public void testThatMoreLikeThisQueryMultiTermVectorRequestContainsContextAndHeaders() throws Exception { transportClient().prepareIndex(lookupIndex, "type", "1") .setSource(jsonBuilder().startObject().field("name", "Star Wars - The new republic").endObject()) .get(); transportClient().prepareIndex(queryIndex, "type", "1") .setSource(jsonBuilder().startObject().field("name", "Jar Jar Binks - A horrible mistake").endObject()) .get(); transportClient().prepareIndex(queryIndex, "type", "2") .setSource(jsonBuilder().startObject().field("name", "Star Wars - Return of the jedi").endObject()) .get(); transportClient().admin().indices().prepareRefresh(lookupIndex, queryIndex).get(); MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = QueryBuilders.moreLikeThisQuery(new String[]{"name"}, null, new Item[]{new Item(lookupIndex, "type", "1")}) .minTermFreq(1) .minDocFreq(1); SearchResponse searchResponse = transportClient() .prepareSearch(queryIndex) .setQuery(moreLikeThisQueryBuilder) .get(); assertNoFailures(searchResponse); assertHitCount(searchResponse, 1); assertRequestsContainHeader(MultiTermVectorsRequest.class); }
public void testSimpleMoreLikeThis() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("text", "lucene").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("2").source(jsonBuilder().startObject().field("text", "lucene release").endObject())).actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); }
public void testSimpleMoreLikeOnLongField() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", "some_long", "type=long")); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("some_long", 1367484649580L).endObject())).actionGet(); client().index(indexRequest("test").type("type2").id("2").source(jsonBuilder().startObject().field("some_long", 0).endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("some_long", -666).endObject())).actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 0L); }
public void testMoreLikeThisWithAliasesInLikeDocuments() throws Exception { String indexName = "foo"; String aliasName = "foo_name"; String typeName = "bar"; String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate(indexName).addMapping(typeName, mapping, XContentType.JSON).get(); client().admin().indices().prepareAliases().addAlias(indexName, aliasName).get(); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); client().index(indexRequest(indexName).type(typeName).id("1").source(jsonBuilder().startObject().field("text", "elasticsearch index").endObject())).actionGet(); client().index(indexRequest(indexName).type(typeName).id("2").source(jsonBuilder().startObject().field("text", "lucene index").endObject())).actionGet(); client().index(indexRequest(indexName).type(typeName).id("3").source(jsonBuilder().startObject().field("text", "elasticsearch index").endObject())).actionGet(); refresh(indexName); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item(aliasName, typeName, "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 2L); assertThat(response.getHits().getAt(0).getId(), equalTo("3")); }
public void testMoreLikeThisIssue2197() throws Exception { Client client = client(); String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("foo").addMapping("bar", mapping, XContentType.JSON).execute().actionGet(); client().prepareIndex("foo", "bar", "1") .setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject().endObject()) .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); }
public void testMoreLikeWithCustomRouting() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("foo").addMapping("bar", mapping, XContentType.JSON).execute().actionGet(); ensureGreen(); client().prepareIndex("foo", "bar", "1") .setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject().endObject()) .setRouting("2") .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1").routing("2")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); }
public void testMoreLikeThisIssueRoutingNotSerialized() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); assertAcked(prepareCreate("foo", 2, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 2).put(SETTING_NUMBER_OF_REPLICAS, 0)) .addMapping("bar", mapping, XContentType.JSON)); ensureGreen(); client().prepareIndex("foo", "bar", "1") .setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject().endObject()) .setRouting("4000") .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1").routing("4000")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); }
public void testMoreLikeThisArtificialDocs() throws Exception { int numFields = randomIntBetween(5, 10); createIndex("test"); ensureGreen(); logger.info("Indexing a single document ..."); XContentBuilder doc = jsonBuilder().startObject(); for (int i = 0; i < numFields; i++) { doc.field("field" + i, generateRandomStringArray(5, 10, false) + "a"); // make sure they are not all empty } doc.endObject(); indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc)); logger.info("Checking the document matches ..."); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", doc).routing("0")}) // routing to ensure we hit the shard with the doc .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) .minimumShouldMatch("100%"); // strict all terms must match! SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 1); }
public void testItemSerializationBwc() throws IOException { final byte[] data = Base64.getDecoder().decode("AQVpbmRleAEEdHlwZQEODXsiZm9vIjoiYmFyIn0A/wD//////////QAAAAAAAAAA"); final Version version = randomFrom(Version.V_5_0_0, Version.V_5_0_1, Version.V_5_0_2, Version.V_5_0_3_UNRELEASED, Version.V_5_1_1_UNRELEASED, Version.V_5_1_2_UNRELEASED, Version.V_5_2_0_UNRELEASED); try (StreamInput in = StreamInput.wrap(data)) { in.setVersion(version); Item item = new Item(in); assertEquals(XContentType.JSON, item.xContentType()); assertEquals("{\"foo\":\"bar\"}", item.doc().utf8ToString()); assertEquals("index", item.index()); assertEquals("type", item.type()); try (BytesStreamOutput out = new BytesStreamOutput()) { out.setVersion(version); item.writeTo(out); assertArrayEquals(data, out.bytes().toBytesRef().bytes); } } }
private static void setDefaultIndexTypeFields(QueryParseContext parseContext, Item item, List<String> moreLikeFields, boolean useDefaultField) { if (item.index() == null) { item.index(parseContext.index().name()); } if (item.type() == null) { if (parseContext.queryTypes().size() > 1) { throw new QueryParsingException(parseContext, "ambiguous type for item with id: " + item.id() + " and index: " + item.index()); } else { item.type(parseContext.queryTypes().iterator().next()); } } // default fields if not present but don't override for artificial docs if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) { if (useDefaultField) { item.fields("*"); } else { item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()])); } } }
public void testSimpleMoreLikeInclude() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source( jsonBuilder().startObject() .field("text", "Apache Lucene is a free/open source information retrieval software library").endObject())) .actionGet(); client().index(indexRequest("test").type("type1").id("2").source( jsonBuilder().startObject() .field("text", "Lucene has been ported to other programming languages").endObject())) .actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running More Like This with include true"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); assertOrderedSearchHits(response, "1", "2"); response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "2")}).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); assertOrderedSearchHits(response, "2", "1"); logger.info("Running More Like This with include false"); response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get(); assertSearchHits(response, "2"); }
public void testSimpleMoreLikeThisIdsMultipleTypes() throws Exception { logger.info("Creating index test"); int numOfTypes = randomIntBetween(2, 10); CreateIndexRequestBuilder createRequestBuilder = prepareCreate("test"); for (int i = 0; i < numOfTypes; i++) { createRequestBuilder.addMapping("type" + i, jsonBuilder().startObject().startObject("type" + i).startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject()); } assertAcked(createRequestBuilder); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); List<IndexRequestBuilder> builders = new ArrayList<>(numOfTypes); for (int i = 0; i < numOfTypes; i++) { builders.add(client().prepareIndex("test", "type" + i).setSource("text", "lucene" + " " + i).setId(String.valueOf(i))); } indexRandom(true, builders); logger.info("Running MoreLikeThis"); MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery(new String[] {"text"}, null, new Item[] {new Item("test", "type0", "0")}).include(true).minTermFreq(1).minDocFreq(1); String[] types = new String[numOfTypes]; for (int i = 0; i < numOfTypes; i++) { types[i] = "type"+i; } SearchResponse mltResponse = client().prepareSearch().setTypes(types).setQuery(queryBuilder).execute().actionGet(); assertHitCount(mltResponse, numOfTypes); }
public void testMoreLikeThisMultiValueFields() throws Exception { logger.info("Creating the index ..."); assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=keyword") .setSettings(SETTING_NUMBER_OF_SHARDS, 1)); ensureGreen(); logger.info("Indexing ..."); String[] values = {"aaaa", "bbbb", "cccc", "dddd", "eeee", "ffff", "gggg", "hhhh", "iiii", "jjjj"}; List<IndexRequestBuilder> builders = new ArrayList<>(values.length + 1); // index one document with all the values builders.add(client().prepareIndex("test", "type1", "0").setSource("text", values)); // index each document with only one of the values for (int i = 0; i < values.length; i++) { builders.add(client().prepareIndex("test", "type1", String.valueOf(i + 1)).setSource("text", values[i])); } indexRandom(true, builders); int maxIters = randomIntBetween(10, 20); for (int i = 0; i < maxIters; i++) { int max_query_terms = randomIntBetween(1, values.length); logger.info("Running More Like This with max_query_terms = {}", max_query_terms); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new String[] {"text"}, null, new Item[] {new Item(null, null, "0")}) .minTermFreq(1).minDocFreq(1) .maxQueryTerms(max_query_terms).minimumShouldMatch("0%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).execute().actionGet(); assertSearchResponse(response); assertHitCount(response, max_query_terms); } }
public void testSelectFields() throws IOException, ExecutionException, InterruptedException { assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=whitespace", "text1", "type=text,analyzer=whitespace")); ensureGreen("test"); indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder() .startObject() .field("text", "hello world") .field("text1", "elasticsearch") .endObject()), client().prepareIndex("test", "type1", "2").setSource(jsonBuilder() .startObject() .field("text", "goodby moon") .field("text1", "elasticsearch") .endObject())); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", "1")}) .minTermFreq(0) .minDocFreq(0) .include(true) .minimumShouldMatch("1%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 2); mltQuery = moreLikeThisQuery(new String[] {"text"}, null, new Item[] {new Item("test", "type1", "1")}) .minTermFreq(0) .minDocFreq(0) .include(true) .minimumShouldMatch("1%"); response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 1); }
@Before public void setup() { // MLT only supports string fields, unsupported fields are tested below randomFields = randomStringFields(); // we also preset the item requests randomLikeItems = new Item[randomIntBetween(1, 3)]; for (int i = 0; i < randomLikeItems.length; i++) { randomLikeItems[i] = generateRandomItem(); } // and for the unlike items too randomUnlikeItems = new Item[randomIntBetween(1, 3)]; for (int i = 0; i < randomUnlikeItems.length; i++) { randomUnlikeItems[i] = generateRandomItem(); } }
public void testItemSerialization() throws IOException { Item expectedItem = generateRandomItem(); BytesStreamOutput output = new BytesStreamOutput(); expectedItem.writeTo(output); Item newItem = new Item(output.bytes().streamInput()); assertEquals(expectedItem, newItem); }
public void testItemFromXContent() throws IOException { Item expectedItem = generateRandomItem(); String json = expectedItem.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string(); XContentParser parser = createParser(JsonXContent.jsonXContent, json); Item newItem = Item.parse(parser, new Item()); assertEquals(expectedItem, newItem); }
public void testMLT() { String[] fields = {"name.first", "name.last"}; String[] texts = {"text like this one"}; Item[] items = null; moreLikeThisQuery(fields, texts, items) .minTermFreq(1) .maxQueryTerms(12); }
@Override public <P extends ParaObject> List<P> findSimilar(String appid, String type, String filterKey, String[] fields, String liketext, Pager... pager) { if (StringUtils.isBlank(liketext)) { return Collections.emptyList(); } QueryBuilder qb; if (fields == null || fields.length == 0) { qb = moreLikeThisQuery(new String[]{liketext}).minDocFreq(1).minTermFreq(1); } else { boolean containsNestedProps = Arrays.stream(fields).anyMatch((f) -> StringUtils.startsWith(f, PROPS_PREFIX)); if (nestedMode() && containsNestedProps) { BoolQueryBuilder bqb = boolQuery(); for (String field : fields) { QueryBuilder kQuery = matchQuery(PROPS_PREFIX + "k", getNestedKey(field)); QueryBuilder vQuery = moreLikeThisQuery(new String[]{PROPS_PREFIX + "v"}, new String[]{liketext}, Item.EMPTY_ARRAY).minDocFreq(1).minTermFreq(1); bqb.should(nestedPropsQuery(boolQuery().must(kQuery).must(vQuery))); } qb = bqb; } else { qb = moreLikeThisQuery(fields, new String[]{liketext}, Item.EMPTY_ARRAY). minDocFreq(1).minTermFreq(1); } } if (!StringUtils.isBlank(filterKey)) { qb = boolQuery().mustNot(termQuery(Config._ID, filterKey)).filter(qb); } return searchQuery(appid, searchQueryRaw(appid, type, qb, pager)); }
private static void parseLikeField(QueryParseContext parseContext, List<String> texts, List<Item> items) throws IOException { XContentParser parser = parseContext.parser(); if (parser.currentToken().isValue()) { texts.add(parser.text()); } else if (parser.currentToken() == XContentParser.Token.START_OBJECT) { items.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item())); } else { throw new IllegalArgumentException("Content of 'like' parameter should either be a string or an object"); } }
private static void handleExclude(BooleanQuery boolQuery, List<Item> likeItems) { // artificial docs get assigned a random id and should be disregarded List<BytesRef> uids = new ArrayList<>(); for (Item item : likeItems) { if (item.doc() != null) { continue; } uids.add(createUidAsBytes(item.type(), item.id())); } if (!uids.isEmpty()) { TermsQuery query = new TermsQuery(UidFieldMapper.NAME, uids.toArray(new BytesRef[0])); boolQuery.add(query, BooleanClause.Occur.MUST_NOT); } }
public MultiTermVectorsResponse fetchResponse(List<Item> items, SearchContext searchContext) throws IOException { MultiTermVectorsRequest request = new MultiTermVectorsRequest(); for (Item item : items) { request.add(item.toTermVectorsRequest()); } request.copyContextAndHeadersFrom(searchContext); return client.multiTermVectors(request).actionGet(); }
public void testMoreLikeThisWithAliases() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Creating aliases alias release"); client().admin().indices().prepareAliases() .addAlias("test", "release", termQuery("text", "release")) .addAlias("test", "beta", termQuery("text", "beta")).get(); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("text", "lucene beta").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("2").source(jsonBuilder().startObject().field("text", "lucene release").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("text", "elasticsearch beta").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("4").source(jsonBuilder().startObject().field("text", "elasticsearch release").endObject())).actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis on index"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 2L); logger.info("Running moreLikeThis on beta shard"); response = client().prepareSearch("beta").setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); assertThat(response.getHits().getAt(0).getId(), equalTo("3")); logger.info("Running moreLikeThis on release shard"); response = client().prepareSearch("release").setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); assertThat(response.getHits().getAt(0).getId(), equalTo("2")); logger.info("Running moreLikeThis on alias with node client"); response = internalCluster().coordOnlyNodeClient().prepareSearch("beta").setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); assertThat(response.getHits().getAt(0).getId(), equalTo("3")); }
public void testNumericField() throws Exception { final String[] numericTypes = new String[]{"byte", "short", "integer", "long"}; prepareCreate("test").addMapping("type", jsonBuilder() .startObject().startObject("type") .startObject("properties") .startObject("int_value").field("type", randomFrom(numericTypes)).endObject() .startObject("string_value").field("type", "text").endObject() .endObject() .endObject().endObject()).execute().actionGet(); ensureGreen(); client().prepareIndex("test", "type", "1") .setSource(jsonBuilder().startObject().field("string_value", "lucene index").field("int_value", 1).endObject()) .execute().actionGet(); client().prepareIndex("test", "type", "2") .setSource(jsonBuilder().startObject().field("string_value", "elasticsearch index").field("int_value", 42).endObject()) .execute().actionGet(); refresh(); // Implicit list of fields -> ignore numeric fields SearchResponse searchResponse = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(searchResponse, 1L); // Explicit list of fields including numeric fields -> fail assertThrows(client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(new String[] {"string_value", "int_value"}, null, new Item[] {new Item("test", "type", "1")}).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class); // mlt query with no field -> No results (because _all is not enabled) searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"index"}).minTermFreq(1).minDocFreq(1)).execute().actionGet(); assertHitCount(searchResponse, 0L); // mlt query with string fields searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[]{"string_value"}, new String[] {"index"}, null).minTermFreq(1).minDocFreq(1)).execute().actionGet(); assertHitCount(searchResponse, 2L); // mlt query with at least a numeric field -> fail by default assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"string_value", "int_value"}, new String[] {"index"}, null)), SearchPhaseExecutionException.class); // mlt query with at least a numeric field -> fail by command assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"string_value", "int_value"}, new String[] {"index"}, null).failOnUnsupportedField(true)), SearchPhaseExecutionException.class); // mlt query with at least a numeric field but fail_on_unsupported_field set to false searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"string_value", "int_value"}, new String[] {"index"}, null).minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).get(); assertHitCount(searchResponse, 2L); // mlt field query on a numeric field -> failure by default assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"int_value"}, new String[] {"42"}, null).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class); // mlt field query on a numeric field -> failure by command assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"int_value"}, new String[] {"42"}, null).minTermFreq(1).minDocFreq(1).failOnUnsupportedField(true)), SearchPhaseExecutionException.class); // mlt field query on a numeric field but fail_on_unsupported_field set to false searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"int_value"}, new String[] {"42"}, null).minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).execute().actionGet(); assertHitCount(searchResponse, 0L); }
public void testMoreLikeThisMalformedArtificialDocs() throws Exception { logger.info("Creating the index ..."); assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=whitespace", "date", "type=date")); ensureGreen("test"); logger.info("Creating an index with a single document ..."); indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder() .startObject() .field("text", "Hello World!") .field("date", "2009-01-01") .endObject())); logger.info("Checking with a malformed field value ..."); XContentBuilder malformedFieldDoc = jsonBuilder() .startObject() .field("text", "Hello World!") .field("date", "this is not a date!") .endObject(); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", malformedFieldDoc)}) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 0); logger.info("Checking with an empty document ..."); XContentBuilder emptyDoc = jsonBuilder().startObject().endObject(); mltQuery = moreLikeThisQuery(null, new Item[] {new Item("test", "type1", emptyDoc)}) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 0); logger.info("Checking the document matches otherwise ..."); XContentBuilder normalDoc = jsonBuilder() .startObject() .field("text", "Hello World!") .field("date", "1000-01-01") // should be properly parsed but ignored ... .endObject(); mltQuery = moreLikeThisQuery(null, new Item[] {new Item("test", "type1", normalDoc)}) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("100%"); // strict all terms must match but date is ignored response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 1); }
public void testMoreLikeThisUnlike() throws ExecutionException, InterruptedException, IOException { createIndex("test"); ensureGreen(); int numFields = randomIntBetween(5, 10); logger.info("Create a document that has all the fields."); XContentBuilder doc = jsonBuilder().startObject(); for (int i = 0; i < numFields; i++) { doc.field("field"+i, i+""); } doc.endObject(); logger.info("Indexing each field value of this document as a single document."); List<IndexRequestBuilder> builders = new ArrayList<>(); for (int i = 0; i < numFields; i++) { builders.add(client().prepareIndex("test", "type1", i+"").setSource("field"+i, i+"")); } indexRandom(true, builders); logger.info("First check the document matches all indexed docs."); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", doc)}) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) .minimumShouldMatch("0%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, numFields); logger.info("Now check like this doc, but ignore one doc in the index, then two and so on..."); List<Item> docs = new ArrayList<>(numFields); for (int i = 0; i < numFields; i++) { docs.add(new Item("test", "type1", i+"")); mltQuery = moreLikeThisQuery(null, new Item[] {new Item("test", "type1", doc)}) .unlike(docs.toArray(new Item[docs.size()])) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) .include(true) .minimumShouldMatch("0%"); response = client().prepareSearch("test").setTypes("type1").setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, numFields - (i + 1)); } }
@Override protected MoreLikeThisQueryBuilder doCreateTestQueryBuilder() { MoreLikeThisQueryBuilder queryBuilder; String[] likeTexts = null; Item[] likeItems = null; // like field is required if (randomBoolean()) { likeTexts = generateRandomStringArray(5, 5, false, false); } else { likeItems = randomLikeItems; } if (randomBoolean()) { // for the default field queryBuilder = new MoreLikeThisQueryBuilder(likeTexts, likeItems); } else { queryBuilder = new MoreLikeThisQueryBuilder(randomFields, likeTexts, likeItems); } if (randomBoolean()) { queryBuilder.unlike(generateRandomStringArray(5, 5, false, false)); } if (randomBoolean()) { queryBuilder.unlike(randomUnlikeItems); } if (randomBoolean()) { queryBuilder.maxQueryTerms(randomInt(25)); } if (randomBoolean()) { queryBuilder.minTermFreq(randomInt(5)); } if (randomBoolean()) { queryBuilder.minDocFreq(randomInt(5)); } if (randomBoolean()) { queryBuilder.maxDocFreq(randomInt(100)); } if (randomBoolean()) { queryBuilder.minWordLength(randomInt(5)); } if (randomBoolean()) { queryBuilder.maxWordLength(randomInt(25)); } if (randomBoolean()) { queryBuilder.stopWords(generateRandomStringArray(5, 5, false, false)); } if (randomBoolean()) { queryBuilder.analyzer(randomAnalyzer()); // fix the analyzer? } if (randomBoolean()) { queryBuilder.minimumShouldMatch(randomMinimumShouldMatch()); } if (randomBoolean()) { queryBuilder.boostTerms(randomFloat() * 10); } if (randomBoolean()) { queryBuilder.include(randomBoolean()); } if (randomBoolean()) { queryBuilder.failOnUnsupportedField(randomBoolean()); } return queryBuilder; }
@Override protected Set<String> getObjectsHoldingArbitraryContent() { //doc contains arbitrary content, anything can be added to it and no exception will be thrown return Collections.singleton(MoreLikeThisQueryBuilder.Item.Field.DOC.getPreferredName()); }
public void testValidateEmptyLike() { String[] likeTexts = randomBoolean() ? null : new String[0]; Item[] likeItems = randomBoolean() ? null : new Item[0]; IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MoreLikeThisQueryBuilder(likeTexts, likeItems)); assertThat(e.getMessage(), containsString("requires either 'like' texts or items to be specified")); }
public Fields[] fetch(List<Item> items) throws IOException { return getFieldsFor(fetchResponse(items, SearchContext.current())); }
/** * A more like this query that finds documents that are "like" the provided texts or documents * which is checked against the fields the query is constructed with. * * @param fields the field names that will be used when generating the 'More Like This' query. * @param likeTexts the text to use when generating the 'More Like This' query. * @param likeItems the documents to use when generating the 'More Like This' query. */ public static MoreLikeThisQueryBuilder moreLikeThisQuery(String[] fields, String[] likeTexts, Item[] likeItems) { return new MoreLikeThisQueryBuilder(fields, likeTexts, likeItems); }
/** * A more like this query that finds documents that are "like" the provided texts or documents * which is checked against the "_all" field. * @param likeTexts the text to use when generating the 'More Like This' query. * @param likeItems the documents to use when generating the 'More Like This' query. */ public static MoreLikeThisQueryBuilder moreLikeThisQuery(String[] likeTexts, Item[] likeItems) { return moreLikeThisQuery(null, likeTexts, likeItems); }
/** * A more like this query that finds documents that are "like" the provided documents * which is checked against the "_all" field. * @param likeItems the documents to use when generating the 'More Like This' query. */ public static MoreLikeThisQueryBuilder moreLikeThisQuery(Item[] likeItems) { return moreLikeThisQuery(null, null, likeItems); }