public void testRandom2GraphAfter() throws Exception { final int numIters = atLeast(3); Random random = random(); for (int i = 0; i < numIters; i++) { b = new SynonymMap.Builder(random.nextBoolean()); final int numEntries = atLeast(10); for (int j = 0; j < numEntries; j++) { add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean()); } final SynonymMap map = b.build(); final boolean ignoreCase = random.nextBoolean(); final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase); TokenStream graph = new MockGraphTokenFilter(random(), syns); return new TokenStreamComponents(tokenizer, graph); } }; checkRandomData(random, analyzer, 100); } }
public void testRandomHugeStringsMockGraphAfter() throws Exception { // Randomly inject graph tokens after JapaneseTokenizer: Random random = random(); checkRandomData(random, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), reader, readDict(), false, Mode.SEARCH); TokenStream graph = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, graph); } }, 100*RANDOM_MULTIPLIER, 8192); }
public void testRandomHugeStringsGraphAfter() throws Exception { Random random = random(); checkRandomData(random, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory(), reader); TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, tokenStream); } }, 100*RANDOM_MULTIPLIER, 8192); }
public void testRandomHugeStringsMockGraphAfter() throws Exception { // Randomly inject graph tokens after JapaneseTokenizer: Random random = random(); checkRandomData(random, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new JapaneseTokenizer(reader, readDict(), false, Mode.SEARCH); TokenStream graph = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, graph); } }, 100*RANDOM_MULTIPLIER, 8192); }
public void testRandomHugeStringsGraphAfter() throws Exception { Random random = random(); checkRandomData(random, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader); TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer); return new TokenStreamComponents(tokenizer, tokenStream); } }, 100*RANDOM_MULTIPLIER, 8192); }
private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer, boolean offsetsAreCorrect) { TokenFilterSpec spec = new TokenFilterSpec(); spec.offsetsAreCorrect = offsetsAreCorrect; spec.stream = tokenizer; StringBuilder descr = new StringBuilder(); int numFilters = random.nextInt(5); for (int i = 0; i < numFilters; i++) { // Insert ValidatingTF after each stage so we can // catch problems right after the TF that "caused" // them: spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect); while (true) { final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size())); // hack: MockGraph/MockLookahead has assertions that will trip if they follow // an offsets violator. so we cant use them after e.g. wikipediatokenizer if (!spec.offsetsAreCorrect && (ctor.getDeclaringClass().equals(MockGraphTokenFilter.class) || ctor.getDeclaringClass().equals(MockRandomLookaheadTokenFilter.class))) { continue; } final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes()); if (broken(ctor, args)) { continue; } final TokenFilter flt = createComponent(ctor, args, descr); if (flt != null) { spec.offsetsAreCorrect &= !brokenOffsets(ctor, args); spec.stream = flt; break; } } } // Insert ValidatingTF after each stage so we can // catch problems right after the TF that "caused" // them: spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect); spec.toString = descr.toString(); return spec; }