Java 类org.apache.lucene.analysis.Analyzer.TokenStreamComponents 实例源码

项目:lams    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:search    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:search    文件:TestWordDelimiterFilter.java   
/** concat numbers + words + all */
public void testLotsOfConcatenating() throws Exception {
  final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_WORDS | CATENATE_NUMBERS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;    

  /* analyzer that uses whitespace + wdf */
  Analyzer a = new Analyzer() {
    @Override
    public TokenStreamComponents createComponents(String field, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
    }
  };

  assertAnalyzesTo(a, "abc-def-123-456", 
      new String[] { "abc", "abcdef", "abcdef123456", "def", "123", "123456", "456" }, 
      new int[] { 0, 0, 0, 4, 8, 8, 12 }, 
      new int[] { 3, 7, 15, 7, 11, 15, 15 },
      new int[] { 1, 0, 0, 1, 1, 0, 1 });
}
项目:search    文件:TestWordDelimiterFilter.java   
/** concat numbers + words + all + preserve original */
public void testLotsOfConcatenating2() throws Exception {
  final int flags = PRESERVE_ORIGINAL | GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_WORDS | CATENATE_NUMBERS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;    

  /* analyzer that uses whitespace + wdf */
  Analyzer a = new Analyzer() {
    @Override
    public TokenStreamComponents createComponents(String field, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, null));
    }
  };

  assertAnalyzesTo(a, "abc-def-123-456", 
      new String[] { "abc-def-123-456", "abc", "abcdef", "abcdef123456", "def", "123", "123456", "456" }, 
      new int[] { 0, 0, 0, 0, 4, 8, 8, 12 }, 
      new int[] { 15, 3, 7, 15, 7, 11, 15, 15 },
      new int[] { 1, 0, 0, 0, 1, 1, 0, 1 });
}
项目:search    文件:TestWordDelimiterFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  int numIterations = atLeast(5);
  for (int i = 0; i < numIterations; i++) {
    final int flags = random().nextInt(512);
    final CharArraySet protectedWords;
    if (random().nextBoolean()) {
      protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // TODO: properly support positionLengthAttribute
    checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
  }
}
项目:search    文件:TestWordDelimiterFilter.java   
/** blast some enormous random strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  int numIterations = atLeast(5);
  for (int i = 0; i < numIterations; i++) {
    final int flags = random().nextInt(512);
    final CharArraySet protectedWords;
    if (random().nextBoolean()) {
      protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // TODO: properly support positionLengthAttribute
    checkRandomData(random(), a, 100*RANDOM_MULTIPLIER, 8192, false, false);
  }
}
项目:search    文件:TestWordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:search    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:NYBC    文件:TestWordDelimiterFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  int numIterations = atLeast(5);
  for (int i = 0; i < numIterations; i++) {
    final int flags = random().nextInt(512);
    final CharArraySet protectedWords;
    if (random().nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    checkRandomData(random(), a, 200, 20, false, false);
  }
}
项目:NYBC    文件:TestWordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:read-open-source-code    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:FSTSynonymFilterFactory.java   
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);

  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestWordDelimiterFilter.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  int numIterations = atLeast(5);
  for (int i = 0; i < numIterations; i++) {
    final int flags = random().nextInt(512);
    final CharArraySet protectedWords;
    if (random().nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    checkRandomData(random(), a, 200, 20, false, false);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestWordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:search    文件:TestIDVersionPostingsFormat.java   
public void testMissingPayload() throws Exception {
  Directory dir = newDirectory();

  // MockAnalyzer minus maybePayload else it sometimes stuffs in an 8-byte payload!
  Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true, 100);
        tokenizer.setEnableChecks(true);
        MockTokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
        return new TokenStreamComponents(tokenizer, filt);
      }
    };
  IndexWriterConfig iwc = newIndexWriterConfig(a);
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  doc.add(newTextField("id", "id", Field.Store.NO));
  try {
    w.addDocument(doc);
    w.commit();
    fail("didn't hit expected exception");
  } catch (IllegalArgumentException iae) {
    // expected
  }

  w.close();
  dir.close();
}
项目:search    文件:TestReversePathHierarchyTokenizer.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new ReversePathHierarchyTokenizer(newAttributeFactory(), reader, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }    
  };
  // TODO: properly support positionLengthAttribute
  checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER, 20, false, false);
}
项目:search    文件:TestReversePathHierarchyTokenizer.java   
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  Random random = random();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new ReversePathHierarchyTokenizer(newAttributeFactory(), reader, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }    
  };
  // TODO: properly support positionLengthAttribute
  checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027, false, false);
}
项目:search    文件:TestSnowball.java   
public void testEmptyTerm() throws IOException {
  for (final String lang : SNOWBALL_LANGS) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
      }
    };
    checkOneTerm(a, "", "");
  }
}
项目:search    文件:TestSnowball.java   
public void checkRandomStrings(final String snowballLanguage) throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer t = new MockTokenizer(reader);
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };
  checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
项目:SciGraph    文件:EntityAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName) {
  Tokenizer tokenizer = new WhitespaceTokenizer();
  TokenStream result =
      new PatternReplaceFilter(tokenizer,
          Pattern.compile("^([\\.!\\?,:;\"'\\(\\)]*)(.*?)([\\.!\\?,:;\"'\\(\\)]*)$"), "$2", true);
  result = new PatternReplaceFilter(result, Pattern.compile("'s"), "s", true);
  return new TokenStreamComponents(tokenizer, result);
}
项目:NYBC    文件:TestReversePathHierarchyTokenizer.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }    
  };
  checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
项目:NYBC    文件:TestReversePathHierarchyTokenizer.java   
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  Random random = random();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }    
  };
  checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027);
}
项目:NYBC    文件:TestSnowball.java   
public void testEmptyTerm() throws IOException {
  for (final String lang : SNOWBALL_LANGS) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
      }
    };
    checkOneTermReuse(a, "", "");
  }
}
项目:NYBC    文件:TestSnowball.java   
public void checkRandomStrings(final String snowballLanguage) throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer t = new MockTokenizer(reader);
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };
  checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
项目:NYBC    文件:TestPortugueseLightStemFilter.java   
@Override
protected TokenStreamComponents createComponents(String fieldName,
    Reader reader) {
  Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
  TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
  return new TokenStreamComponents(source, new PortugueseLightStemFilter(result));
}
项目:NYBC    文件:TestPortugueseLightStemFilter.java   
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink));
    }
  };
  checkOneTerm(a, "quilométricas", "quilométricas");
}
项目:NYBC    文件:TestPortugueseLightStemFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
    }
  };
  checkOneTermReuse(a, "", "");
}
项目:NYBC    文件:TestPortugueseMinimalStemFilter.java   
@Override
protected TokenStreamComponents createComponents(String fieldName,
    Reader reader) {
  Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
  TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
  return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(result));
}
项目:NYBC    文件:TestPortugueseMinimalStemFilter.java   
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenStream sink = new KeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink));
    }
  };
  checkOneTerm(a, "quilométricas", "quilométricas");
}
项目:NYBC    文件:TestPortugueseMinimalStemFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new PortugueseMinimalStemFilter(tokenizer));
    }
  };
  checkOneTermReuse(a, "", "");
}
项目:Pydev    文件:IndexingTest.java   
@Override
public void setUp() throws Exception {
    super.setUp();

    // Create it in-memory
    indexApi = new IndexApi(new RAMDirectory(), true);
    indexApi.registerTokenizer(IFields.PYTHON, CodeAnalyzer.createPythonStreamComponents());
    TokenStreamComponents stringOrComment = CodeAnalyzer.createStringsOrCommentsStreamComponents();
    indexApi.registerTokenizer(IFields.STRING, stringOrComment);
    indexApi.registerTokenizer(IFields.COMMENT, stringOrComment);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestReversePathHierarchyTokenizer.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }    
  };
  checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestReversePathHierarchyTokenizer.java   
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  Random random = random();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new ReversePathHierarchyTokenizer(reader);
      return new TokenStreamComponents(tokenizer, tokenizer);
    }    
  };
  checkRandomData(random, a, 100*RANDOM_MULTIPLIER, 1027);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSnowball.java   
public void testEmptyTerm() throws IOException {
  for (final String lang : SNOWBALL_LANGS) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
      }
    };
    checkOneTerm(a, "", "");
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSnowball.java   
public void checkRandomStrings(final String snowballLanguage) throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer t = new MockTokenizer(reader);
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };
  checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestPortugueseLightStemFilter.java   
@Override
protected TokenStreamComponents createComponents(String fieldName,
    Reader reader) {
  Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
  TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
  return new TokenStreamComponents(source, new PortugueseLightStemFilter(result));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestPortugueseLightStemFilter.java   
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink));
    }
  };
  checkOneTerm(a, "quilométricas", "quilométricas");
}