public void testMMapDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene")); IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("MMapDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open)); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("MMapDirectory search consumes {}ms!", (end - start)); }
public void testRamDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); RAMDirectory ramDirectory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("RamDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory)); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RamDirectory search consumes {}ms!", (end - start)); }
public void testCreateMultiDocumentSearcher() throws Exception { int numDocs = randomIntBetween(2, 8); List<ParseContext.Document> docs = new ArrayList<>(numDocs); for (int i = 0; i < numDocs; i++) { docs.add(new ParseContext.Document()); } Analyzer analyzer = new WhitespaceAnalyzer(); ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null); IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument); assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs)); // ensure that any query get modified so that the nested docs are never included as hits: Query query = new MatchAllDocsQuery(); BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery(); assertThat(result.clauses().size(), equalTo(2)); assertThat(result.clauses().get(0).getQuery(), sameInstance(query)); assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST)); assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT)); }
/** * Test the WordScorer emitted by the smoothing model */ public void testBuildWordScorer() throws IOException { SmoothingModel testModel = createTestModel(); Map<String, Analyzer> mapping = new HashMap<>(); mapping.put("field", new WhitespaceAnalyzer()); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping); IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper)); Document doc = new Document(); doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); DirectoryReader ir = DirectoryReader.open(writer); WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d, BytesRefs.toBytesRef(" ")); assertWordScorer(wordScorer, testModel); }
@Override protected void loadAnalyzerFactory(Map<String, AnalyzerInfo> analyzerFactoryMap) { //extract entire word registerAnalyzer(analyzerFactoryMap, "keyword", "Keyword Analyzer", new DefaultAnalyzerFactory(KeywordAnalyzer.class)); //lucene StandardAnalyzer registerAnalyzer(analyzerFactoryMap, "standard", "Standard Analyzer", new DefaultAnalyzerFactory(StandardAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "ngram", "NGram Analyzer", new DefaultAnalyzerFactory(NGramWordAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "primary", "Primary Word Analyzer", new DefaultAnalyzerFactory(PrimaryWordAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "whitespace", "Whitespace Analyzer", new DefaultAnalyzerFactory(WhitespaceAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "csv", "Comma separated value Analyzer", new DefaultAnalyzerFactory(CSVAnalyzer.class)); registerAnalyzer(analyzerFactoryMap, "autocomplete", "Autocomplete Analyzer", new DefaultAnalyzerFactory(AutocompleteAnalyzer.class)); }
/** * Index a picture * @param source * @param picture_id * @param conf * @throws IOException */ public static void index(byte[] source, UUID picture_id, IndexWriterConfig conf) throws IOException { ByteArrayInputStream in = new ByteArrayInputStream(source); BufferedImage image = ImageIO.read(in); // Creating an Lucene IndexWriter log.debug("Is Lucene configured? " + (conf == null)); if(conf == null) { conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } luceneIndexer(image, picture_id, FeatureEnumerate.AutoColorCorrelogram.getText(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), conf); luceneIndexer(image, picture_id, FeatureEnumerate.CEDD.getText(), DocumentBuilderFactory.getCEDDDocumentBuilder(), conf); luceneIndexer(image, picture_id, FeatureEnumerate.ColorLayout.getText(), DocumentBuilderFactory.getColorLayoutBuilder(), conf); luceneIndexer(image, picture_id, FeatureEnumerate.EdgeHistogram.getText(), DocumentBuilderFactory.getEdgeHistogramBuilder(), conf); luceneIndexer(image, picture_id, FeatureEnumerate.ColorHistogram.getText(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), conf); luceneIndexer(image, picture_id, FeatureEnumerate.PHOG.getText(), DocumentBuilderFactory.getPHOGDocumentBuilder(), conf); }
private static void deleteFromFeature(UUID pictureId, Term term, String prefix, IndexWriterConfig conf) throws IOException { File file = getPath(prefix); // Creating an Lucene IndexWriter log.debug("Is Lucene configured: " + (conf == null)); if(conf == null) { conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter iw = new IndexWriter(FSDirectory.open(file), conf); iw.deleteDocuments(term); iw.close(); }
private Document createTestNormsDocument(boolean setNormsProp, boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal) throws Exception { Properties props = new Properties(); // Indexing configuration. props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("directory", "RAMDirectory"); if (setNormsProp) { props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal)); } if (setBodyNormsProp) { props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal)); } // Create PerfRunData Config config = new Config(props); DocMaker dm = new DocMaker(); dm.setConfig(config, new OneDocSource()); return dm.makeDocument(); }
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer())); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for(int i=0;i<100;i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongField("timestamp", then, Field.Store.NO)); indexWriter.addDocument(doc); } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true)); indexWriter.close(); }
@Test public void testUnicode() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer()); // chinese text value Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); tokens = converter.convert("text_购field:我购买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); tokens = converter.convert("text_field:我购xyz买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); }
@Test public void testMultipleClauses() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer()); // two field:value pairs should give two tokens Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size()); // a field:value pair and a search term should give two tokens tokens = converter.convert("text_field:我购买了道具和服装。 bar"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size()); }
@Test public void testTermOffsetsTokenStream() throws Exception { String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; Analyzer a1 = new WhitespaceAnalyzer(); TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n"); tokenStream.reset(); TermOffsetsTokenStream tots = new TermOffsetsTokenStream( tokenStream); for( String v : multivalued ){ TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() ); Analyzer a2 = new WhitespaceAnalyzer(); TokenStream ts2 = a2.tokenStream("", v); ts2.reset(); while (ts1.incrementToken()) { assertTrue(ts2.incrementToken()); assertEquals(ts1, ts2); } assertFalse(ts2.incrementToken()); } }
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception { log.info("Rules extraction started."); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47)); conf.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(new File(path)), conf); Document doc = new Document(); doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES)); for (String u : rulesSource.uses()) doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES)); writer.addDocument(doc); start(aps.iterator()); writer.waitForMerges(); writer.close(true); log.info(cache.size() + " rules extracted!"); }
public KeywordFinder(File inputFile) throws IOException { RAMDirectory ramdir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer()); IndexWriter writer = new IndexWriter(ramdir, conf); BufferedReader reader = new BufferedReader(new FileReader(inputFile)); while (reader.ready()) { String keyword = reader.readLine().toLowerCase().trim(); if (keyword.length() > 0) { Document doc = new Document(); doc.add(new TextField("keyword", keyword.replace("-", " ").replace("_", " ").replace("\\", " ").replace("/", " "), Field.Store.YES)); writer.addDocument(doc); } } writer.close(); searcher = new IndexSearcher(DirectoryReader.open(ramdir)); }
@Override public List<String> getTokens(Reader reader) throws IOException { List<String> tokens = new ArrayList<>(); Analyzer analyzer = new WhitespaceAnalyzer(); TokenStream tokenStream = analyzer.tokenStream("text", reader); tokenStream.reset(); CharTermAttribute cattr = tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { String token = cattr.toString(); tokens.add(token); } tokenStream.end(); if (finder != null) { return finder.process(tokens); } else { return tokens; } }
public void run() { // do it ... try { // IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setCodec(new LireCustomCodec()); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config); for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) { File inputFile = iterator.next(); if (verbose) System.out.println("Processing " + inputFile.getPath() + "."); readFile(indexWriter, inputFile); if (verbose) System.out.println("Indexing finished."); } indexWriter.commit(); indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } }
public void testIndexingAndSearchSift() throws IOException { // Creating an Lucene IndexWriter IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, new WhitespaceAnalyzer(Version.LUCENE_40)); IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), conf); long ms = System.currentTimeMillis(); int count = 0; ArrayList<File> files = FileUtils.getAllImageFiles(new File("testdata\\ferrari"), true); for (Iterator<File> i = files.iterator(); i.hasNext(); ) { File imgFile = i.next(); iw.addDocument(siftBuilder.createDocument( ImageIO.read(imgFile), imgFile.getPath())); count++; if (count > 100 && count % 500 == 0) { System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } } iw.close(); IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath)); SiftFeatureHistogramBuilder sfh = new SiftFeatureHistogramBuilder(ir, 1000, 500); sfh.index(); }
public void testExtendedIndexMSER() throws IOException { MSERDocumentBuilder builder = new MSERDocumentBuilder(); IndexWriterConfig conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), conf); long ms = System.currentTimeMillis(); int count = 0; ArrayList<File> files = FileUtils.getAllImageFiles(new File("D:\\DataSets\\WIPO\\CA\\sample"), true); for (Iterator<File> i = files.iterator(); i.hasNext(); ) { File imgFile = i.next(); BufferedImage img = ImageIO.read(imgFile); if (Math.max(img.getWidth(), img.getHeight()) < 800) { // scale image ... img = ImageUtils.scaleImage(img, 800); } iw.addDocument(builder.createDocument(img, imgFile.getPath())); count++; if (count > 2 && count % 25 == 0) { System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } } iw.close(); }
@Test public void shouldTurnAnalysisOn() throws SyntaxError, IOException { when(localParamsMock.get("command")).thenReturn("smembers"); when(localParamsMock.get("key")).thenReturn("simpleKey"); when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true); when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field"); when(requestMock.getSchema()).thenReturn(schema); when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer()); when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321"))); redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler); final Query query = redisQParser.parse(); verify(jedisMock).smembers("simpleKey"); IndexSearcher searcher = new IndexSearcher(new MultiReader()); final Set<Term> terms = extractTerms(searcher, query); Assert.assertEquals(3, terms.size()); }
@Test public void shouldRetryWhenRedisFailed() throws SyntaxError, IOException { when(localParamsMock.get("command")).thenReturn("smembers"); when(localParamsMock.get("key")).thenReturn("simpleKey"); when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(false); when(localParamsMock.get("retries")).thenReturn("2"); when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field"); when(requestMock.getSchema()).thenReturn(schema); when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer()); when(jedisPoolMock.getResource()).thenReturn(jedisFailingMock).thenReturn(jedisMock); when(jedisFailingMock.smembers("simpleKey")).thenThrow(new JedisException("Synthetic exception")); when(jedisMock.smembers("simpleKey")).thenReturn(new HashSet<String>(Collections.singletonList("value"))); redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, new RetryingCommandHandler(jedisPoolMock, 1)); final Query query = redisQParser.parse(); IndexSearcher searcher = new IndexSearcher(new MultiReader()); final Set<Term> terms = extractTerms(searcher, query); Assert.assertEquals(1, terms.size()); }
@Test public void shouldUseTermsQuery() throws SyntaxError, IOException { when(localParamsMock.get("command")).thenReturn("smembers"); when(localParamsMock.get("key")).thenReturn("simpleKey"); when(localParamsMock.get("ignoreScore")).thenReturn("true"); when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true); when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field"); when(requestMock.getSchema()).thenReturn(schema); when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer()); when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321", "322", "323", "324", "325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338"))); redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler); final Query query = redisQParser.parse(); verify(jedisMock).smembers("simpleKey"); IndexSearcher searcher = new IndexSearcher(new MultiReader()); Query rewrittenQuery = searcher.rewrite(query); assertTrue(rewrittenQuery instanceof TermsQuery); }
protected Query doToQuery(QueryShardContext context) throws IOException { // Analyzer analyzer = context.getMapperService().searchAnalyzer(); Analyzer analyzer = new WhitespaceAnalyzer(); try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) { CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source)); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<CustomSpanTermQuery> clauses = new ArrayList<>(); stream.reset(); while (stream.incrementToken()) { Term term = new Term(fieldName, termAtt.getBytesRef()); clauses.add(new CustomSpanTermQuery(term)); } return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount); } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } }
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); // Reused across documents, to add the necessary facet fields FacetFields facetFields = new FacetFields(taxoWriter, indexingParams); add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1"); add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7"); add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5"); indexWriter.close(); taxoWriter.close(); }
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); // Reused across documents, to add the necessary facet fields FacetFields facetFields = new FacetFields(taxoWriter); add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1"); add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7"); add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5"); indexWriter.close(); taxoWriter.close(); }
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); // Reused across documents, to add the necessary facet fields FacetFields facetFields = new AssociationsFacetFields(taxoWriter); for (int i = 0; i < CATEGORIES.length; i++) { Document doc = new Document(); CategoryAssociationsContainer associations = new CategoryAssociationsContainer(); for (int j = 0; j < CATEGORIES[i].length; j++) { associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]); } facetFields.addFields(doc, associations); indexWriter.addDocument(doc); } indexWriter.close(); taxoWriter.close(); }
@Test public void testUnicode() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); // chinese text value Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); tokens = converter.convert("text_购field:我购买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); tokens = converter.convert("text_field:我购xyz买了道具和服装。"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size()); }
@Test public void testMultipleClauses() { SpellingQueryConverter converter = new SpellingQueryConverter(); converter.init(new NamedList()); converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); // two field:value pairs should give two tokens Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size()); // a field:value pair and a search term should give two tokens tokens = converter.convert("text_field:我购买了道具和服装。 bar"); assertTrue("tokens is null and it shouldn't be", tokens != null); assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size()); }
@Test public void testTermOffsetsTokenStream() throws Exception { String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n")); tokenStream.reset(); TermOffsetsTokenStream tots = new TermOffsetsTokenStream( tokenStream); for( String v : multivalued ){ TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() ); Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); TokenStream ts2 = a2.tokenStream("", new StringReader(v)); ts2.reset(); while (ts1.incrementToken()) { assertTrue(ts2.incrementToken()); assertEquals(ts1, ts2); } assertFalse(ts2.incrementToken()); } }
@Test public void testColumnVisibilityPayload() throws Exception { String visibility = "U"; String value = "value"; RestrictedField restrictedField = new RestrictedField(new StringField( "field", value, Field.Store.NO), new FieldVisibility(visibility)); try(TokenStream tokenStream = restrictedField.tokenStream( new WhitespaceAnalyzer(), null)) { CharTermAttribute charTermAttribute = tokenStream .getAttribute(CharTermAttribute.class); PayloadAttribute payloadAttribute = tokenStream .getAttribute(PayloadAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { assertEquals(value, new String(charTermAttribute.buffer(), 0, charTermAttribute.length())); assertEquals(visibility, new String(payloadAttribute.getPayload().bytes)); } } }
@Test public void testDocValuesFormat() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf.setCodec(new Blur024Codec()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField("f", "v", Store.YES)); doc.add(new SortedDocValuesField("f", new BytesRef("v"))); writer.addDocument(doc); writer.close(); DirectoryReader reader = DirectoryReader.open(directory); AtomicReaderContext context = reader.leaves().get(0); AtomicReader atomicReader = context.reader(); SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f"); assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName())); reader.close(); }
@Test public void test5() throws ParseException, IOException { parser = new SuperParser(LUCENE_VERSION, getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)), true, null, ScoreType.SUPER, new Term("_primedoc_")); Query query = parser.parse("<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<> -<g.c:c g.d:d>"); BooleanQuery booleanQuery1 = new BooleanQuery(); booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD); booleanQuery1.add(new TermRangeQuery("a.d", new BytesRef("e"), new BytesRef("f"), false, false), Occur.SHOULD); booleanQuery1.add(new TermQuery(new Term("a.b", "b")), Occur.SHOULD); // std analyzer took the "<" out booleanQuery1.add(new TermQuery(new Term("a.test", "hello<")), Occur.SHOULD); BooleanQuery booleanQuery2 = new BooleanQuery(); booleanQuery2.add(new TermQuery(new Term("g.c", "c")), Occur.SHOULD); booleanQuery2.add(new TermQuery(new Term("g.d", "d")), Occur.SHOULD); SuperQuery superQuery1 = new SuperQuery(booleanQuery1, ScoreType.SUPER, new Term("_primedoc_")); SuperQuery superQuery2 = new SuperQuery(booleanQuery2, ScoreType.SUPER, new Term("_primedoc_")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(superQuery1, Occur.SHOULD); booleanQuery.add(superQuery2, Occur.MUST_NOT); assertQuery(booleanQuery, query); }
private static Analyzer getAnalyzer(JsonObject analyzer) { switch (analyzer.getString("type")) { case "MerescoDutchStemmingAnalyzer": JsonArray jsonFields = analyzer.getJsonArray("stemmingFields"); String[] fields = new String[jsonFields.size()]; for (int i = 0; i < jsonFields.size(); i++) { fields[i] = jsonFields.getString(i); } return new MerescoDutchStemmingAnalyzer(fields); case "MerescoStandardAnalyzer": return new MerescoStandardAnalyzer(); case "WhitespaceAnalyzer": return new WhitespaceAnalyzer(); } return null; }
@Test public void testCascadeCombo() throws IOException { ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), new KeywordAnalyzer() ), new StandardAnalyzer(TEST_VERSION_CURRENT), new KeywordAnalyzer() ); for (int i = 0 ; i < 3 ; i++) assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)), new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)}, new int[]{ 0, 0, 0, 0, 5, 7, 7, 14, 14, 19, 19}, new int[]{ 4, 4, 20, 20, 6, 13, 13, 18, 18, 20, 20}, new int[]{ 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0}); }