public void testMaxTermLength2() throws Exception { ClassicAnalyzer sa = new ClassicAnalyzer(); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"}); sa.setMaxTokenLength(5); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1}); }
public void testLucene1140() throws Exception { try { ClassicAnalyzer analyzer = new ClassicAnalyzer(); assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" }); } catch (NullPointerException e) { fail("Should not throw an NPE and it did"); } }
public void testMaxTermLength2() throws Exception { ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"}); sa.setMaxTokenLength(5); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1}); }
public void testLucene1140() throws Exception { try { ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" }); } catch (NullPointerException e) { fail("Should not throw an NPE and it did"); } }
public void testMaxTermLength() throws Exception { ClassicAnalyzer sa = new ClassicAnalyzer(); sa.setMaxTokenLength(5); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}); }
public void testJava14BWCompatibility() throws Exception { ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_3_0); assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" }); }
/** * Make sure we skip wicked long terms. */ public void testWickedLongTerm() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer())); char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); // This produces a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; doc.add(new TextField("content", contents, Field.Store.NO)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another")); assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new TextField("content", bigTerm, Field.Store.NO)); ClassicAnalyzer sa = new ClassicAnalyzer(); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); }
/** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { checkRandomData(random(), new ClassicAnalyzer(), 1000*RANDOM_MULTIPLIER); }
/** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); checkRandomData(random, new ClassicAnalyzer(), 100*RANDOM_MULTIPLIER, 8192); }
@Test public void testGetClassic() { Analyzer analyzer = PreBuiltAnalyzers.CLASSIC.get(); Assert.assertEquals(ClassicAnalyzer.class, analyzer.getClass()); }
public void testMaxTermLength() throws Exception { ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.setMaxTokenLength(5); assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}); }
public void testJava14BWCompatibility() throws Exception { ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30); assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" }); }
/** * Make sure we skip wicked long terms. */ public void testWickedLongTerm() throws IOException { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); // This produces a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; doc.add(new TextField("content", contents, Field.Store.NO)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another")); assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new TextField("content", bigTerm, Field.Store.NO)); ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.setMaxTokenLength(100000); writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); }
/** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER); }
/** blast some random large strings through the analyzer */ public void testRandomHugeStrings() throws Exception { Random random = random(); checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192); }