Java 类org.apache.lucene.analysis.core.WhitespaceAnalyzer 实例源码

项目:RedisDirectory    文件:TestLucene.java   
public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}
项目:RedisDirectory    文件:TestLucene.java   
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
项目:elasticsearch_my    文件:PercolateQueryBuilderTests.java   
public void testCreateMultiDocumentSearcher() throws Exception {
    int numDocs = randomIntBetween(2, 8);
    List<ParseContext.Document> docs = new ArrayList<>(numDocs);
    for (int i = 0; i < numDocs; i++) {
        docs.add(new ParseContext.Document());
    }

    Analyzer analyzer = new WhitespaceAnalyzer();
    ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null);
    IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument);
    assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs));

    // ensure that any query get modified so that the nested docs are never included as hits:
    Query query = new MatchAllDocsQuery();
    BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery();
    assertThat(result.clauses().size(), equalTo(2));
    assertThat(result.clauses().get(0).getQuery(), sameInstance(query));
    assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
    assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT));
}
项目:elasticsearch_my    文件:SmoothingModelTestCase.java   
/**
 * Test the WordScorer emitted by the smoothing model
 */
public void testBuildWordScorer() throws IOException {
    SmoothingModel testModel = createTestModel();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("field", new WhitespaceAnalyzer());
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
    Document doc = new Document();
    doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    DirectoryReader ir = DirectoryReader.open(writer);

    WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
            BytesRefs.toBytesRef(" "));
    assertWordScorer(wordScorer, testModel);
}
项目:RedisDirectory    文件:TestLucene.java   
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
项目:RedisDirectory    文件:TestLucene.java   
public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}
项目:fastcatsearch3    文件:BasicAnalysisPlugin.java   
@Override
protected void loadAnalyzerFactory(Map<String, AnalyzerInfo> analyzerFactoryMap) {
    //extract entire word 
    registerAnalyzer(analyzerFactoryMap, "keyword", "Keyword Analyzer", new DefaultAnalyzerFactory(KeywordAnalyzer.class));
    //lucene StandardAnalyzer
    registerAnalyzer(analyzerFactoryMap, "standard", "Standard Analyzer", new DefaultAnalyzerFactory(StandardAnalyzer.class));

    registerAnalyzer(analyzerFactoryMap, "ngram", "NGram Analyzer", new DefaultAnalyzerFactory(NGramWordAnalyzer.class));

    registerAnalyzer(analyzerFactoryMap, "primary", "Primary Word Analyzer", new DefaultAnalyzerFactory(PrimaryWordAnalyzer.class));

    registerAnalyzer(analyzerFactoryMap, "whitespace", "Whitespace Analyzer", new DefaultAnalyzerFactory(WhitespaceAnalyzer.class));

    registerAnalyzer(analyzerFactoryMap, "csv", "Comma separated value Analyzer", new DefaultAnalyzerFactory(CSVAnalyzer.class));

       registerAnalyzer(analyzerFactoryMap, "autocomplete", "Autocomplete Analyzer", new DefaultAnalyzerFactory(AutocompleteAnalyzer.class));
}
项目:flipper-reverse-image-search    文件:LireBuilder.java   
/**
 * Index a picture
 * @param source
 * @param picture_id
 * @param conf
 * @throws IOException
 */
public static void index(byte[] source, UUID picture_id, IndexWriterConfig conf) throws IOException
{
    ByteArrayInputStream in = new ByteArrayInputStream(source);
    BufferedImage image = ImageIO.read(in);

    // Creating an Lucene IndexWriter
    log.debug("Is Lucene configured? " + (conf == null));
    if(conf == null) {
        conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }

    luceneIndexer(image, picture_id, FeatureEnumerate.AutoColorCorrelogram.getText(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.CEDD.getText(), DocumentBuilderFactory.getCEDDDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.ColorLayout.getText(), DocumentBuilderFactory.getColorLayoutBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.EdgeHistogram.getText(), DocumentBuilderFactory.getEdgeHistogramBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.ColorHistogram.getText(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.PHOG.getText(), DocumentBuilderFactory.getPHOGDocumentBuilder(), conf);

}
项目:flipper-reverse-image-search    文件:LireBuilder.java   
private static void deleteFromFeature(UUID pictureId, Term term, String prefix, IndexWriterConfig conf) throws IOException {

        File file = getPath(prefix);

        // Creating an Lucene IndexWriter
        log.debug("Is Lucene configured: " + (conf == null));
        if(conf == null) {
            conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
            conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }
        IndexWriter iw = new IndexWriter(FSDirectory.open(file), conf);

        iw.deleteDocuments(term);

        iw.close();
    }
项目:search    文件:DocMakerTest.java   
private Document createTestNormsDocument(boolean setNormsProp,
    boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
    throws Exception {
  Properties props = new Properties();

  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("directory", "RAMDirectory");
  if (setNormsProp) {
    props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
  }
  if (setBodyNormsProp) {
    props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
  }

  // Create PerfRunData
  Config config = new Config(props);

  DocMaker dm = new DocMaker();
  dm.setConfig(config, new OneDocSource());
  return dm.makeDocument();
}
项目:search    文件:ExpressionAggregationFacetsExample.java   
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer()));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

  Document doc = new Document();
  doc.add(new TextField("c", "foo bar", Store.NO));
  doc.add(new NumericDocValuesField("popularity", 5L));
  doc.add(new FacetField("A", "B"));
  indexWriter.addDocument(config.build(taxoWriter, doc));

  doc = new Document();
  doc.add(new TextField("c", "foo foo bar", Store.NO));
  doc.add(new NumericDocValuesField("popularity", 3L));
  doc.add(new FacetField("A", "C"));
  indexWriter.addDocument(config.build(taxoWriter, doc));

  indexWriter.close();
  taxoWriter.close();
}
项目:search    文件:RangeFacetsExample.java   
/** Build the example index. */
public void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer()));

  // Add documents with a fake timestamp, 1000 sec before
  // "now", 2000 sec before "now", ...:
  for(int i=0;i<100;i++) {
    Document doc = new Document();
    long then = nowSec - i * 1000;
    // Add as doc values field, so we can compute range facets:
    doc.add(new NumericDocValuesField("timestamp", then));
    // Add as numeric field so we can drill-down:
    doc.add(new LongField("timestamp", then, Field.Store.NO));
    indexWriter.addDocument(doc);
  }

  // Open near-real-time searcher
  searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true));
  indexWriter.close();
}
项目:search    文件:SpellingQueryConverterTest.java   
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());

  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
项目:search    文件:SpellingQueryConverterTest.java   
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
项目:search    文件:HighlighterTest.java   
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer();
  TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer();
    TokenStream ts2 = a2.tokenStream("", v);
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
项目:edits    文件:RulesIndexGenerator.java   
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception {

        log.info("Rules extraction started.");
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47));
        conf.setOpenMode(OpenMode.CREATE);
        writer = new IndexWriter(FSDirectory.open(new File(path)), conf);
        Document doc = new Document();
        doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES));
        for (String u : rulesSource.uses())
            doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES));
        writer.addDocument(doc);
        start(aps.iterator());
        writer.waitForMerges();
        writer.close(true);
        log.info(cache.size() + " rules extracted!");

    }
项目:tri    文件:KeywordFinder.java   
public KeywordFinder(File inputFile) throws IOException {
    RAMDirectory ramdir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(ramdir, conf);
    BufferedReader reader = new BufferedReader(new FileReader(inputFile));
    while (reader.ready()) {
        String keyword = reader.readLine().toLowerCase().trim();
        if (keyword.length() > 0) {
            Document doc = new Document();
            doc.add(new TextField("keyword", keyword.replace("-", " ").replace("_", " ").replace("\\", " ").replace("/", " "), Field.Store.YES));
            writer.addDocument(doc);
        }
    }
    writer.close();
    searcher = new IndexSearcher(DirectoryReader.open(ramdir));
}
项目:tri    文件:TriWhitespaceTokenizer.java   
@Override
public List<String> getTokens(Reader reader) throws IOException {
    List<String> tokens = new ArrayList<>();
    Analyzer analyzer = new WhitespaceAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("text", reader);
    tokenStream.reset();
    CharTermAttribute cattr = tokenStream.addAttribute(CharTermAttribute.class);
    while (tokenStream.incrementToken()) {
        String token = cattr.toString();
        tokens.add(token);
    }
    tokenStream.end();
    if (finder != null) {
        return finder.process(tokens);
    } else {
        return tokens;
    }
}
项目:lire    文件:Indexor.java   
public void run() {
        // do it ...
        try {
//            IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
            IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            config.setCodec(new LireCustomCodec());
            IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
            for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) {
                File inputFile = iterator.next();
                if (verbose) System.out.println("Processing " + inputFile.getPath() + ".");
                readFile(indexWriter, inputFile);
                if (verbose) System.out.println("Indexing finished.");
            }
            indexWriter.commit();
            indexWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
项目:lire    文件:VisualWordsTest.java   
public void testIndexingAndSearchSift() throws IOException {
    // Creating an Lucene IndexWriter
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, new WhitespaceAnalyzer(Version.LUCENE_40));
    IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), conf);
    long ms = System.currentTimeMillis();
    int count = 0;
    ArrayList<File> files = FileUtils.getAllImageFiles(new File("testdata\\ferrari"), true);
    for (Iterator<File> i = files.iterator(); i.hasNext(); ) {
        File imgFile = i.next();
        iw.addDocument(siftBuilder.createDocument(
                ImageIO.read(imgFile), imgFile.getPath()));
        count++;
        if (count > 100 && count % 500 == 0) {
            System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file");
        }

    }
    iw.close();
    IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath));
    SiftFeatureHistogramBuilder sfh = new SiftFeatureHistogramBuilder(ir, 1000, 500);
    sfh.index();
}
项目:lire    文件:MserTest.java   
public void testExtendedIndexMSER() throws IOException {
    MSERDocumentBuilder builder = new MSERDocumentBuilder();
    IndexWriterConfig conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION,
            new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), conf);
    long ms = System.currentTimeMillis();
    int count = 0;
    ArrayList<File> files = FileUtils.getAllImageFiles(new File("D:\\DataSets\\WIPO\\CA\\sample"), true);
    for (Iterator<File> i = files.iterator(); i.hasNext(); ) {
        File imgFile = i.next();
        BufferedImage img = ImageIO.read(imgFile);
        if (Math.max(img.getWidth(), img.getHeight()) < 800) {
            // scale image ...
            img = ImageUtils.scaleImage(img, 800);
        }
        iw.addDocument(builder.createDocument(img, imgFile.getPath()));
        count++;
        if (count > 2 && count % 25 == 0) {
            System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file");
        }

    }
    iw.close();
}
项目:solr-redis    文件:TestRedisQParser.java   
@Test
public void shouldTurnAnalysisOn() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).smembers("simpleKey");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(3, terms.size());
}
项目:solr-redis    文件:TestRedisQParser.java   
@Test
public void shouldRetryWhenRedisFailed() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(false);
  when(localParamsMock.get("retries")).thenReturn("2");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisPoolMock.getResource()).thenReturn(jedisFailingMock).thenReturn(jedisMock);
  when(jedisFailingMock.smembers("simpleKey")).thenThrow(new JedisException("Synthetic exception"));
  when(jedisMock.smembers("simpleKey")).thenReturn(new HashSet<String>(Collections.singletonList("value")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock,
          new RetryingCommandHandler(jedisPoolMock, 1));
  final Query query = redisQParser.parse();
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(1, terms.size());
}
项目:solr-redis    文件:TestRedisQParser.java   
@Test
public void shouldUseTermsQuery() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("ignoreScore")).thenReturn("true");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321", "322", "323", "324",
          "325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).smembers("simpleKey");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  Query rewrittenQuery = searcher.rewrite(query);
  assertTrue(rewrittenQuery instanceof TermsQuery);
}
项目:pyramid    文件:PhraseCountQueryBuilder.java   
protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }
项目:NYBC    文件:DocMakerTest.java   
private Document createTestNormsDocument(boolean setNormsProp,
    boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
    throws Exception {
  Properties props = new Properties();

  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("directory", "RAMDirectory");
  if (setNormsProp) {
    props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
  }
  if (setBodyNormsProp) {
    props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
  }

  // Create PerfRunData
  Config config = new Config(props);

  DocMaker dm = new DocMaker();
  dm.setConfig(config, new OneDocSource());
  return dm.makeDocument();
}
项目:NYBC    文件:MultiCategoryListsFacetsExample.java   
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  // Reused across documents, to add the necessary facet fields
  FacetFields facetFields = new FacetFields(taxoWriter, indexingParams);

  add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1");
  add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7");
  add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5");

  indexWriter.close();
  taxoWriter.close();
}
项目:NYBC    文件:SimpleFacetsExample.java   
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  // Reused across documents, to add the necessary facet fields
  FacetFields facetFields = new FacetFields(taxoWriter);

  add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1");
  add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7");
  add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5");

  indexWriter.close();
  taxoWriter.close();
}
项目:NYBC    文件:AssociationsFacetsExample.java   
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  // Reused across documents, to add the necessary facet fields
  FacetFields facetFields = new AssociationsFacetFields(taxoWriter);

  for (int i = 0; i < CATEGORIES.length; i++) {
    Document doc = new Document();
    CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
    for (int j = 0; j < CATEGORIES[i].length; j++) {
      associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
    }
    facetFields.addFields(doc, associations);
    indexWriter.addDocument(doc);
  }

  indexWriter.close();
  taxoWriter.close();
}
项目:NYBC    文件:SpellingQueryConverterTest.java   
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));

  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
项目:NYBC    文件:SpellingQueryConverterTest.java   
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
项目:NYBC    文件:HighlighterTest.java   
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
  TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n"));
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    TokenStream ts2 = a2.tokenStream("", new StringReader(v));
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
项目:lucure-core    文件:RestrictedFieldTest.java   
@Test
public void testColumnVisibilityPayload() throws Exception {
    String visibility = "U";
    String value = "value";
    RestrictedField restrictedField = new RestrictedField(new StringField(
      "field", value, Field.Store.NO), new FieldVisibility(visibility));
    try(TokenStream tokenStream = restrictedField.tokenStream(
      new WhitespaceAnalyzer(), null)) {
        CharTermAttribute charTermAttribute = tokenStream
          .getAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttribute = tokenStream
          .getAttribute(PayloadAttribute.class);

        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            assertEquals(value, new String(charTermAttribute.buffer(), 0, charTermAttribute.length()));
            assertEquals(visibility, new String(payloadAttribute.getPayload().bytes));
        }
    }
}
项目:incubator-blur    文件:Blur024CodecTest.java   
@Test
public void testDocValuesFormat() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf.setCodec(new Blur024Codec());
  IndexWriter writer = new IndexWriter(directory, conf);

  Document doc = new Document();
  doc.add(new StringField("f", "v", Store.YES));
  doc.add(new SortedDocValuesField("f", new BytesRef("v")));
  writer.addDocument(doc);

  writer.close();

  DirectoryReader reader = DirectoryReader.open(directory);
  AtomicReaderContext context = reader.leaves().get(0);
  AtomicReader atomicReader = context.reader();
  SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f");
  assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName()));

  reader.close();
}
项目:incubator-blur    文件:SuperParserTest.java   
@Test
public void test5() throws ParseException, IOException {
  parser = new SuperParser(LUCENE_VERSION, getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)), true, null,
      ScoreType.SUPER, new Term("_primedoc_"));
  Query query = parser.parse("<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<> -<g.c:c g.d:d>");

  BooleanQuery booleanQuery1 = new BooleanQuery();
  booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD);
  booleanQuery1.add(new TermRangeQuery("a.d", new BytesRef("e"), new BytesRef("f"), false, false), Occur.SHOULD);
  booleanQuery1.add(new TermQuery(new Term("a.b", "b")), Occur.SHOULD);
  // std analyzer took the "<" out
  booleanQuery1.add(new TermQuery(new Term("a.test", "hello<")), Occur.SHOULD);

  BooleanQuery booleanQuery2 = new BooleanQuery();
  booleanQuery2.add(new TermQuery(new Term("g.c", "c")), Occur.SHOULD);
  booleanQuery2.add(new TermQuery(new Term("g.d", "d")), Occur.SHOULD);

  SuperQuery superQuery1 = new SuperQuery(booleanQuery1, ScoreType.SUPER, new Term("_primedoc_"));
  SuperQuery superQuery2 = new SuperQuery(booleanQuery2, ScoreType.SUPER, new Term("_primedoc_"));

  BooleanQuery booleanQuery = new BooleanQuery();
  booleanQuery.add(superQuery1, Occur.SHOULD);
  booleanQuery.add(superQuery2, Occur.MUST_NOT);

  assertQuery(booleanQuery, query);
}
项目:search-core    文件:SpellingQueryConverterTest.java   
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));

  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
项目:search-core    文件:SpellingQueryConverterTest.java   
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
项目:search-core    文件:HighlighterTest.java   
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
  TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n"));
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    TokenStream ts2 = a2.tokenStream("", new StringReader(v));
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
项目:meresco-lucene    文件:LuceneSettings.java   
private static Analyzer getAnalyzer(JsonObject analyzer) {
    switch (analyzer.getString("type")) {
    case "MerescoDutchStemmingAnalyzer":
        JsonArray jsonFields = analyzer.getJsonArray("stemmingFields");
        String[] fields = new String[jsonFields.size()];
        for (int i = 0; i < jsonFields.size(); i++) {
            fields[i] = jsonFields.getString(i);
        }
        return new MerescoDutchStemmingAnalyzer(fields);
    case "MerescoStandardAnalyzer":
        return new MerescoStandardAnalyzer();
    case "WhitespaceAnalyzer":
        return new WhitespaceAnalyzer();
    }
    return null;
}
项目:elasticsearch-analysis-german    文件:ComboAnalyzerTests.java   
@Test
public void testCascadeCombo() throws IOException {
    ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
            new ComboAnalyzer(TEST_VERSION_CURRENT,
                    new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                    new KeywordAnalyzer()
            ),
            new StandardAnalyzer(TEST_VERSION_CURRENT),
            new KeywordAnalyzer()
    );
    for (int i = 0 ; i < 3 ; i++)
        assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                new int[]{ 0,  0,  0,  0,  5,  7,  7, 14, 14, 19, 19},
                new int[]{ 4,  4, 20, 20,  6, 13, 13, 18, 18, 20, 20},
                new int[]{ 1,  0,  0,  0,  1,  1,  0,  1,  0,  1,  0});
}