public static void loadIndex(String indexName, String indexFile, Path unzipDir, Path bwcPath, Logger logger, Path... paths) throws Exception { Path unzipDataDir = unzipDir.resolve("data"); Path backwardsIndex = bwcPath.resolve(indexFile); // decompress the index try (InputStream stream = Files.newInputStream(backwardsIndex)) { TestUtil.unzip(stream, unzipDir); } // check it is unique assertTrue(Files.exists(unzipDataDir)); Path[] list = FileSystemUtils.files(unzipDataDir); if (list.length != 1) { throw new IllegalStateException("Backwards index must contain exactly one cluster"); } final Path src = getIndexDir(logger, indexName, indexFile, list[0]); copyIndex(logger, src, src.getFileName().toString(), paths); }
public RandomizingClient(Client client, Random random) { super(client); // we don't use the QUERY_AND_FETCH types that break quite a lot of tests // given that they return `size*num_shards` hits instead of `size` defaultSearchType = RandomPicks.randomFrom(random, Arrays.asList( SearchType.DFS_QUERY_THEN_FETCH, SearchType.QUERY_THEN_FETCH)); if (random.nextInt(10) == 0) { defaultPreference = RandomPicks.randomFrom(random, EnumSet.of(Preference.PRIMARY_FIRST, Preference.LOCAL)).type(); } else if (random.nextInt(10) == 0) { String s = TestUtil.randomRealisticUnicodeString(random, 1, 10); defaultPreference = s.startsWith("_") ? null : s; // '_' is a reserved character } else { defaultPreference = null; } this.batchedReduceSize = 2 + random.nextInt(10); }
public void testBasics() { final int iters = scaledRandomIntBetween(5, 25); for (int j = 0; j < iters; j++) { String[] fields = new String[1 + random().nextInt(10)]; for (int i = 0; i < fields.length; i++) { fields[i] = TestUtil.randomRealisticUnicodeString(random(), 1, 10); } String term = TestUtil.randomRealisticUnicodeString(random(), 1, 10); Term[] terms = toTerms(fields, term); boolean disableCoord = random().nextBoolean(); boolean useBoolean = random().nextBoolean(); float tieBreaker = random().nextFloat(); BlendedTermQuery query = useBoolean ? BlendedTermQuery.booleanBlendedQuery(terms, disableCoord) : BlendedTermQuery.dismaxBlendedQuery(terms, tieBreaker); QueryUtils.check(query); terms = toTerms(fields, term); BlendedTermQuery query2 = useBoolean ? BlendedTermQuery.booleanBlendedQuery(terms, disableCoord) : BlendedTermQuery.dismaxBlendedQuery(terms, tieBreaker); assertEquals(query, query2); } }
@Override protected InternalHistogram createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) { final boolean keyed = randomBoolean(); final DocValueFormat format = DocValueFormat.RAW; final int base = randomInt(50) - 30; final int numBuckets = randomInt(10); final int interval = randomIntBetween(1, 3); List<InternalHistogram.Bucket> buckets = new ArrayList<>(); for (int i = 0; i < numBuckets; ++i) { final int docCount = TestUtil.nextInt(random(), 1, 50); buckets.add(new InternalHistogram.Bucket(base + i * interval, docCount, keyed, format, InternalAggregations.EMPTY)); } return new InternalHistogram(name, buckets, (InternalOrder) InternalHistogram.Order.KEY_ASC, 1, null, format, keyed, pipelineAggregators, metaData); }
@BeforeClass public static void setup() throws IOException { dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); final int numDocs = TestUtil.nextInt(random(), 1, 20); for (int i = 0; i < numDocs; ++i) { final int numHoles = random().nextInt(5); for (int j = 0; j < numHoles; ++j) { w.addDocument(new Document()); } Document doc = new Document(); doc.add(new StringField("foo", "bar", Store.NO)); w.addDocument(doc); } reader = w.getReader(); w.close(); Engine.Searcher engineSearcher = new Engine.Searcher("test", new IndexSearcher(reader)); searcher = new ContextIndexSearcher(engineSearcher, IndexSearcher.getDefaultQueryCache(), MAYBE_CACHE_POLICY); }
public void testSingleValued() throws IOException { Directory dir = newDirectory(); // we need the default codec to check for singletons IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec())); Document doc = new Document(); for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) { doc.add(f); } w.addDocument(doc); final DirectoryReader dirReader = DirectoryReader.open(w); LeafReader reader = getOnlyLeafReader(dirReader); SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData( reader, "half_float").getDoubleValues(); assertNotNull(FieldData.unwrapSingleton(values)); values.setDocument(0); assertEquals(1, values.count()); assertEquals(3f, values.valueAt(0), 0f); IOUtils.close(dirReader, w, dir); }
public void doTestDocValueRangeQueries(NumberType type, Supplier<Number> valueSupplier) throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); final int numDocs = TestUtil.nextInt(random(), 100, 500); for (int i = 0; i < numDocs; ++i) { w.addDocument(type.createFields("foo", valueSupplier.get(), true, true, false)); } DirectoryReader reader = DirectoryReader.open(w); IndexSearcher searcher = newSearcher(reader); w.close(); final int iters = 10; for (int iter = 0; iter < iters; ++iter) { Query query = type.rangeQuery("foo", random().nextBoolean() ? null : valueSupplier.get(), random().nextBoolean() ? null : valueSupplier.get(), randomBoolean(), randomBoolean(), true); assertThat(query, Matchers.instanceOf(IndexOrDocValuesQuery.class)); IndexOrDocValuesQuery indexOrDvQuery = (IndexOrDocValuesQuery) query; assertEquals( searcher.count(indexOrDvQuery.getIndexQuery()), searcher.count(indexOrDvQuery.getRandomAccessQuery())); } reader.close(); dir.close(); }
public void testCheckIntegrity() throws IOException { Directory dir = newDirectory(); long luceneFileLength = 0; try (IndexOutput output = dir.createOutput("lucene_checksum.bin", IOContext.DEFAULT)) { int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); luceneFileLength += bytesRef.length; } CodecUtil.writeFooter(output); luceneFileLength += CodecUtil.footerLength(); } final long luceneChecksum; try (IndexInput indexInput = dir.openInput("lucene_checksum.bin", IOContext.DEFAULT)) { assertEquals(luceneFileLength, indexInput.length()); luceneChecksum = CodecUtil.retrieveChecksum(indexInput); } dir.close(); }
public void testRepetitionsL() throws IOException { Random r = random(); for (int i = 0; i < 10; i++) { int numLongs = TestUtil.nextInt(r, 1, 10000); ByteArrayOutputStream bos = new ByteArrayOutputStream(); long theValue = r.nextLong(); for (int j = 0; j < numLongs; j++) { if (r.nextInt(10) == 0) { theValue = r.nextLong(); } bos.write((byte) (theValue >>> 56)); bos.write((byte) (theValue >>> 48)); bos.write((byte) (theValue >>> 40)); bos.write((byte) (theValue >>> 32)); bos.write((byte) (theValue >>> 24)); bos.write((byte) (theValue >>> 16)); bos.write((byte) (theValue >>> 8)); bos.write((byte) theValue); } doTest(bos.toByteArray()); } }
public void testRepetitionsI() throws IOException { Random r = random(); for (int i = 0; i < 10; i++) { int numInts = TestUtil.nextInt(r, 1, 20000); ByteArrayOutputStream bos = new ByteArrayOutputStream(); int theValue = r.nextInt(); for (int j = 0; j < numInts; j++) { if (r.nextInt(10) == 0) { theValue = r.nextInt(); } bos.write((byte) (theValue >>> 24)); bos.write((byte) (theValue >>> 16)); bos.write((byte) (theValue >>> 8)); bos.write((byte) theValue); } doTest(bos.toByteArray()); } }
public void testRepetitionsS() throws IOException { Random r = random(); for (int i = 0; i < 10; i++) { int numShorts = TestUtil.nextInt(r, 1, 40000); ByteArrayOutputStream bos = new ByteArrayOutputStream(); short theValue = (short) r.nextInt(65535); for (int j = 0; j < numShorts; j++) { if (r.nextInt(10) == 0) { theValue = (short) r.nextInt(65535); } bos.write((byte) (theValue >>> 8)); bos.write((byte) theValue); } doTest(bos.toByteArray()); } }
/** * Test method for {@link org.apache.lucene.util.BytesRefHash#size()}. */ public void testSize() { BytesRefBuilder ref = new BytesRefBuilder(); int num = scaledRandomIntBetween(2, 20); for (int j = 0; j < num; j++) { final int mod = 1+randomInt(40); for (int i = 0; i < 797; i++) { String str; do { str = TestUtil.randomRealisticUnicodeString(random(), 1000); } while (str.length() == 0); ref.copyChars(str); long count = hash.size(); long key = hash.add(ref.get()); if (key < 0) assertEquals(hash.size(), count); else assertEquals(hash.size(), count + 1); if(i % mod == 0) { newHash(); } } } hash.close(); }
public void testFullEviction() throws IOException { int pass = TestUtil.nextInt(random(), 10, 100); CachedFeatureStore cachedFeatureStore = new CachedFeatureStore(memStore, caches); while (pass-- > 0) { StoredFeature feat = LtrTestUtils.randomFeature(); memStore.add(feat); cachedFeatureStore.load(feat.name()); StoredFeatureSet set = LtrTestUtils.randomFeatureSet(); memStore.add(set); cachedFeatureStore.loadSet(set.name()); CompiledLtrModel model = LtrTestUtils.buildRandomModel(); memStore.add(model); cachedFeatureStore.loadModel(model.name()); } caches.evict(memStore.getStoreName()); assertEquals(0, cachedFeatureStore.totalWeight()); assertTrue(caches.getCachedStoreNames().isEmpty()); assertEquals(0, caches.getPerStoreStats(memStore.getStoreName()).modelRam()); assertEquals(0, caches.getPerStoreStats(memStore.getStoreName()).totalCount()); }
public void testCacheStatsIsolation() throws IOException { MemStore one = new MemStore("one"); MemStore two = new MemStore("two"); CachedFeatureStore onefs = new CachedFeatureStore(one, caches); CachedFeatureStore twofs = new CachedFeatureStore(two, caches); int pass = TestUtil.nextInt(random(), 10, 20); while (pass-- > 0) { StoredFeature feat = LtrTestUtils.randomFeature(); one.add(feat); two.add(feat); onefs.load(feat.name()); twofs.load(feat.name()); } assertEquals(2, caches.getCachedStoreNames().size()); caches.evict(one.getStoreName()); assertEquals(1, caches.getCachedStoreNames().size()); caches.evict(two.getStoreName()); assertTrue(caches.getCachedStoreNames().isEmpty()); }
@BeforeClass public static void init() throws Exception { directory = newDirectory(random()); try(IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new StandardAnalyzer()))) { int nDoc = TestUtil.nextInt(random(), 20, 100); docs = new HashMap<>(); for (int i = 0; i < nDoc; i++) { Document d = buildDoc(random().nextBoolean() ? "foo" : "bar", random().nextFloat()); writer.addDocument(d); if (random().nextInt(4) == 0) { writer.commit(); } docs.put(d.get("id"), d); } writer.commit(); } IndexReader reader = closeAfterSuite(DirectoryReader.open(directory)); searcher = new IndexSearcher(reader); }
public Map<String,Doc> buildIndex() { client().admin().indices().prepareCreate("test_index") .addMapping("test", "{\"properties\":{\"scorefield1\": {\"type\": \"float\"}}}", XContentType.JSON) .get(); int numDocs = TestUtil.nextInt(random(), 20, 100); Map<String, Doc> docs = new HashMap<>(); for (int i = 0; i < numDocs; i++) { boolean field1IsFound = random().nextBoolean(); Doc d = new Doc( field1IsFound ? "found" : "notfound", field1IsFound ? "notfound" : "found", Math.abs(random().nextFloat())); indexDoc(d); docs.put(d.id, d); } client().admin().indices().prepareRefresh("test_index").get(); return docs; }
public void testPerfAndRobustness() { SimpleCountRandomTreeGeneratorStatsCollector counts = new SimpleCountRandomTreeGeneratorStatsCollector(); NaiveAdditiveDecisionTree ranker = generateRandomDecTree(100, 1000, 100, 1000, 5, 50, counts); DenseFeatureVector vector = ranker.newFeatureVector(null); int nPass = TestUtil.nextInt(random(), 10, 8916); LinearRankerTests.fillRandomWeights(vector.scores); ranker.score(vector); // warmup long time = -System.currentTimeMillis(); for (int i = 0; i < nPass; i++) { vector = ranker.newFeatureVector(vector); LinearRankerTests.fillRandomWeights(vector.scores); ranker.score(vector); } time += System.currentTimeMillis(); LOG.info("Scored {} docs with {} trees/{} features within {}ms ({} ms/doc), " + "{} nodes ({} splits & {} leaves) ", nPass, counts.trees.get(), ranker.size(), time, (float) time / (float) nPass, counts.nodes.get(), counts.splits.get(), counts.leaves.get()); }
public void testPerfAndRobustness() { LinearRanker ranker = generateRandomRanker(10, 1000); DenseFeatureVector vector = ranker.newFeatureVector(null); int nPass = TestUtil.nextInt(random(), 10, 8916); LinearRankerTests.fillRandomWeights(vector.scores); ranker.score(vector); // warmup long time = -System.currentTimeMillis(); for (int i = 0; i < nPass; i++) { vector = ranker.newFeatureVector(vector); LinearRankerTests.fillRandomWeights(vector.scores); ranker.score(vector); } time += System.currentTimeMillis(); LOG.info("Scored {} docs with {} features within {}ms ({} ms/doc)", nPass, ranker.size(), time, (float) time / (float) nPass); }
public void testTermInfoComparisonConsistency() { TermInfo a = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 ); TermInfo b = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 1, 1 ); TermInfo c = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 2, 1 ); TermInfo d = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 ); assertConsistentEquals( a, a ); assertConsistentEquals( b, b ); assertConsistentEquals( c, c ); assertConsistentEquals( d, d ); assertConsistentEquals( a, d ); assertConsistentLessThan( a, b ); assertConsistentLessThan( b, c ); assertConsistentLessThan( a, c ); assertConsistentLessThan( d, b ); assertConsistentLessThan( d, c ); }
/** * returns a new random sentence, up to maxSentenceLength "words" in length. * each word is a single character (a-z). The first one is capitalized. */ private String newSentence(Random r, int maxSentenceLength) { StringBuilder sb = new StringBuilder(); int numElements = TestUtil.nextInt(r, 1, maxSentenceLength); for (int i = 0; i < numElements; i++) { if (sb.length() > 0) { sb.append(' '); sb.append((char) TestUtil.nextInt(r, 'a', 'z')); } else { // capitalize the first word to help breakiterator sb.append((char) TestUtil.nextInt(r, 'A', 'Z')); } } sb.append(". "); // finalize sentence return sb.toString(); }
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setOmitNorms(true); Field field = newField("field", "", customType); doc.add(field); NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.setStringValue(df.format(i)); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); }
public void testAttributeReflection() throws Exception { PackedTokenAttributeImpl t = new PackedTokenAttributeImpl(); t.append("foobar"); t.setOffset(6, 22); t.setPositionIncrement(3); t.setPositionLength(11); t.setType("foobar"); TestUtil.assertAttributeReflection(t, new HashMap<String, Object>() {{ put(CharTermAttribute.class.getName() + "#term", "foobar"); put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); put(OffsetAttribute.class.getName() + "#startOffset", 6); put(OffsetAttribute.class.getName() + "#endOffset", 22); put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3); put(PositionLengthAttribute.class.getName() + "#positionLength", 11); put(TypeAttribute.class.getName() + "#type", "foobar"); }}); }
public void testDocsEnumStart() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); MemoryIndex memory = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); memory.addField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader(); DocsEnum disi = TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"))); disi = te.docs(null, disi, DocsEnum.FLAG_NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.close(); }
public void testHashCodeAndEquals() { int num = atLeast(100); final boolean singleField = random().nextBoolean(); List<Term> terms = new ArrayList<>(); Set<Term> uniqueTerms = new HashSet<>(); for (int i = 0; i < num; i++) { String field = "field" + (singleField ? "1" : random().nextInt(100)); String string = TestUtil.randomRealisticUnicodeString(random()); terms.add(new Term(field, string)); uniqueTerms.add(new Term(field, string)); TermsFilter left = termsFilter(singleField ? random().nextBoolean() : false, uniqueTerms); Collections.shuffle(terms, random()); TermsFilter right = termsFilter(singleField ? random().nextBoolean() : false, terms); assertEquals(right, left); assertEquals(right.hashCode(), left.hashCode()); if (uniqueTerms.size() > 1) { List<Term> asList = new ArrayList<>(uniqueTerms); asList.remove(0); TermsFilter notEqual = termsFilter(singleField ? random().nextBoolean() : false, asList); assertFalse(left.equals(notEqual)); assertFalse(right.equals(notEqual)); } } }
void assertQuery(Query query, Filter filter) throws Exception { for (int i = 0; i < 10; i++) { boolean reversed = random().nextBoolean(); SortField fields[] = new SortField[] { new SortField("int", SortField.Type.INT, reversed), new SortField("long", SortField.Type.LONG, reversed), new SortField("float", SortField.Type.FLOAT, reversed), new SortField("double", SortField.Type.DOUBLE, reversed), new SortField("intdocvalues", SortField.Type.INT, reversed), new SortField("floatdocvalues", SortField.Type.FLOAT, reversed), new SortField("score", SortField.Type.SCORE) }; Collections.shuffle(Arrays.asList(fields), random()); int numSorts = TestUtil.nextInt(random(), 1, fields.length); assertQuery(query, filter, new Sort(Arrays.copyOfRange(fields, 0, numSorts))); } }
public void testAfterClose() throws Exception { Directory dir1 = getAssertNoDeletesDirectory(newDirectory()); IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig(new MockAnalyzer(random()))); // create the index createIndexNoClose(false, "test", writer); DirectoryReader r = writer.getReader(); writer.close(); TestUtil.checkIndex(dir1); // reader should remain usable even after IndexWriter is closed: assertEquals(100, r.numDocs()); Query q = new TermQuery(new Term("indexname", "test")); IndexSearcher searcher = newSearcher(r); assertEquals(100, searcher.search(q, 10).totalHits); try { DirectoryReader.openIfChanged(r); fail("failed to hit AlreadyClosedException"); } catch (AlreadyClosedException ace) { // expected } r.close(); dir1.close(); }
private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable { final int termIndexInterval = TestUtil.nextInt(random(), 13, 27); final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null); final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random())); final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); Arrays.sort(fields); for (final FieldData field : fields) { if (!allowPreFlex && codec instanceof Lucene3xCodec) { // code below expects unicode sort order continue; } field.write(consumer); } consumer.close(); }
public void testLetterAsciiHuge() throws Exception { Random random = random(); int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2 MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); left.setMaxTokenLength(255); // match CharTokenizer's max token length Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory(), reader); return new TokenStreamComponents(tokenizer, tokenizer); } }; int numIterations = atLeast(50); for (int i = 0; i < numIterations; i++) { String s = TestUtil.randomSimpleString(random, maxLength); assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); } }
public NodeState(Random random, int nodeID, int numNodes) throws IOException { myNodeID = nodeID; dir = newFSDirectory(createTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); if (VERBOSE) { iwc.setInfoStream(new PrintStreamInfoStream(System.out)); } writer = new IndexWriter(dir, iwc); mgr = new SearcherManager(writer, true, null); searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: currentNodeVersions = new long[numNodes]; }
@Ignore public void testPagedGrowableWriterOverflow() { final long size = TestUtil.nextLong(random(), 2 * (long) Integer.MAX_VALUE, 3 * (long) Integer.MAX_VALUE); final int pageSize = 1 << (TestUtil.nextInt(random(), 16, 30)); final PagedGrowableWriter writer = new PagedGrowableWriter(size, pageSize, 1, random().nextFloat()); final long index = TestUtil.nextLong(random(), (long) Integer.MAX_VALUE, size - 1); writer.set(index, 2); assertEquals(2, writer.get(index)); for (int i = 0; i < 1000000; ++i) { final long idx = TestUtil.nextLong(random(), 0, size); if (idx == index) { assertEquals(2, writer.get(idx)); } else { assertEquals(0, writer.get(idx)); } } }
@Override public Object create(Random random) { int num = random.nextInt(10); StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean()); for (int i = 0; i < num; i++) { String input = ""; do { input = TestUtil.randomRealisticUnicodeString(random); } while(input.isEmpty()); String out = ""; TestUtil.randomSimpleString(random); do { out = TestUtil.randomRealisticUnicodeString(random); } while(out.isEmpty()); builder.add(input, out); } try { return builder.build(); } catch (Exception ex) { Rethrow.rethrow(ex); return null; // unreachable code } }
Bits randomLiveDocs(int maxDoc) { if (rarely()) { if (random().nextBoolean()) { return null; } else { return new Bits.MatchNoBits(maxDoc); } } final FixedBitSet bits = new FixedBitSet(maxDoc); final int bitsSet = TestUtil.nextInt(random(), 1, maxDoc - 1); for (int i = 0; i < bitsSet; ++i) { while (true) { final int index = random().nextInt(maxDoc); if (!bits.get(index)) { bits.set(index); break; } } } return bits; }
public void testDocsEnumStart() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(newStringField("foo", "bar", Field.Store.NO)); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); AtomicReader r = getOnlySegmentReader(reader); DocsEnum disi = TestUtil.docs(random(), r, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = r.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"))); disi = TestUtil.docs(random(), te, null, disi, DocsEnum.FLAG_NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); writer.close(); r.close(); dir.close(); }
public void testSupplementaryCharacters() throws IOException { final String s = TestUtil.randomUnicodeString(random(), 10); final int codePointCount = s.codePointCount(0, s.length()); final int minGram = TestUtil.nextInt(random(), 1, 3); final int maxGram = TestUtil.nextInt(random(), minGram, 10); TokenStream tk = new KeywordTokenizer(new StringReader(s)); tk = new NGramTokenFilter(tk, minGram, maxGram); final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class); tk.reset(); for (int start = 0; start < codePointCount; ++start) { for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) { assertTrue(tk.incrementToken()); assertEquals(0, offsetAtt.startOffset()); assertEquals(s.length(), offsetAtt.endOffset()); final int startIndex = Character.offsetByCodePoints(s, 0, start); final int endIndex = Character.offsetByCodePoints(s, 0, end); assertEquals(s.substring(startIndex, endIndex), termAtt.toString()); } } assertFalse(tk.incrementToken()); }
public SeedPostings(long seed, int minDocFreq, int maxDocFreq, Bits liveDocs, IndexOptions options) { random = new Random(seed); docRandom = new Random(random.nextLong()); docFreq = TestUtil.nextInt(random, minDocFreq, maxDocFreq); this.liveDocs = liveDocs; // TODO: more realistic to inversely tie this to numDocs: maxDocSpacing = TestUtil.nextInt(random, 1, 100); if (random.nextInt(10) == 7) { // 10% of the time create big payloads: payloadSize = 1 + random.nextInt(3); } else { payloadSize = 1 + random.nextInt(1); } fixedPayloads = random.nextBoolean(); byte[] payloadBytes = new byte[payloadSize]; payload = new BytesRef(payloadBytes); this.options = options; doPositions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.compareTo(options) <= 0; }
public void testLetterUnicodeHuge() throws Exception { Random random = random(); int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false); left.setMaxTokenLength(255); // match CharTokenizer's max token length Analyzer right = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory(), reader); return new TokenStreamComponents(tokenizer, tokenizer); } }; int numIterations = atLeast(50); for (int i = 0; i < numIterations; i++) { String s = TestUtil.randomUnicodeString(random, maxLength); assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s))); } }
private void verifyCount(IndexReader ir) throws Exception { Fields fields = MultiFields.getFields(ir); if (fields == null) { return; } for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } int docCount = terms.getDocCount(); FixedBitSet visited = new FixedBitSet(ir.maxDoc()); TermsEnum te = terms.iterator(null); while (te.next() != null) { DocsEnum de = TestUtil.docs(random(), te, null, null, DocsEnum.FLAG_NONE); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visited.set(de.docID()); } } assertEquals(visited.cardinality(), docCount); } }
public void testBasic() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); doc.add(makeIDField("id0", 100)); w.addDocument(doc); doc = new Document(); doc.add(makeIDField("id1", 110)); w.addDocument(doc); IndexReader r = w.getReader(); IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator(null); assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50)); assertTrue(termsEnum.seekExact(new BytesRef("id0"), 100)); assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101)); assertTrue(termsEnum.seekExact(new BytesRef("id1"), 50)); assertTrue(termsEnum.seekExact(new BytesRef("id1"), 110)); assertFalse(termsEnum.seekExact(new BytesRef("id1"), 111)); r.close(); w.close(); dir.close(); }
public void testMoreThanOneDocPerIDWithUpdates() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); doc.add(makeIDField("id", 17)); w.addDocument(doc); doc = new Document(); doc.add(makeIDField("id", 17)); // Replaces the doc we just indexed: w.updateDocument(new Term("id", "id"), doc); w.commit(); w.close(); dir.close(); }
public void testInvalidPayload() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo"))); try { w.addDocument(doc); w.commit(); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } w.close(); dir.close(); }