Java 类org.apache.lucene.analysis.util.StringMockResourceLoader 实例源码

项目:NYBC    文件:TestSynonymFilterFactory.java   
/** test multiword offsets with the old impl
 * @deprecated Remove this test in Lucene 5.0 */
@Deprecated
public void testMultiwordOffsetsOld() throws Exception {
  SynonymFilterFactory factory = new SynonymFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("synonyms", "synonyms.txt");
  factory.setLuceneMatchVersion(Version.LUCENE_33);
  factory.init(args);
  factory.inform(new StringMockResourceLoader("national hockey league, nhl"));
  TokenStream ts = factory.create(new MockTokenizer(new StringReader("national hockey league"), MockTokenizer.WHITESPACE, false));
  // WTF?
  assertTokenStreamContents(ts, 
      new String[] { "national", "nhl", "hockey", "league" },
      new int[] { 0, 0, 0, 0 },
      new int[] { 22, 22, 22, 22 },
      new int[] { 1, 0, 1, 1 });
}
项目:NYBC    文件:TestDelimitedPayloadTokenFilterFactory.java   
public void testEncoder() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}
项目:NYBC    文件:TestDelimitedPayloadTokenFilterFactory.java   
public void testDelim() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName());
  args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}
项目:NYBC    文件:TestCollationKeyFilterFactory.java   
public void testNormalization() throws IOException {
  String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("language", "tr");
  args.put("strength", "primary");
  args.put("decomposition", "canonical");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsUpper = factory.create(
      new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
  TokenStream tsLower = factory.create(
      new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:NYBC    文件:TestCollationKeyFilterFactory.java   
public void testFullDecomposition() throws IOException {
  String fullWidth = "Testing";
  String halfWidth = "Testing";
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("language", "zh");
  args.put("strength", "identical");
  args.put("decomposition", "full");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsFull = factory.create(
      new MockTokenizer(new StringReader(fullWidth), MockTokenizer.KEYWORD, false));
  TokenStream tsHalf = factory.create(
      new MockTokenizer(new StringReader(halfWidth), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsFull, tsHalf);
}
项目:NYBC    文件:TestCollationKeyFilterFactory.java   
public void testSecondaryStrength() throws IOException {
  String upperCase = "TESTING";
  String lowerCase = "testing";
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("language", "en");
  args.put("strength", "secondary");
  args.put("decomposition", "no");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsUpper = factory.create(
      new MockTokenizer(new StringReader(upperCase), MockTokenizer.KEYWORD, false));
  TokenStream tsLower = factory.create(
      new MockTokenizer(new StringReader(lowerCase), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestMultiWordSynonyms.java   
public void testMultiWordSynonyms() throws Exception {
  Reader reader = new StringReader("a e");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("a b c,d"),
      "synonyms", "synonyms.txt").create(stream);
  // This fails because ["e","e"] is the value of the token stream
  assertTokenStreamContents(stream, new String[] { "a", "e" });
}
项目:search    文件:TestSynonymFilterFactory.java   
/** test multiword offsets with the old impl
 * @deprecated Remove this test in Lucene 5.0 */
@Deprecated
public void testMultiwordOffsetsOld() throws Exception {
  Reader reader = new StringReader("national hockey league");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", Version.LUCENE_3_3, new StringMockResourceLoader("national hockey league, nhl"),
      "synonyms", "synonyms.txt").create(stream);
  // WTF?
  assertTokenStreamContents(stream, 
      new String[] { "national", "nhl", "hockey", "league" },
      new int[] { 0, 0, 0, 0 },
      new int[] { 22, 22, 22, 22 },
      new int[] { 1, 0, 1, 1 });
}
项目:search    文件:TestSynonymFilterFactory.java   
/** if the synonyms are completely empty, test that we still analyze correctly */
public void testEmptySynonyms() throws Exception {
  Reader reader = new StringReader("GB");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT, 
      new StringMockResourceLoader(""), // empty file!
      "synonyms", "synonyms.txt").create(stream);
  assertTokenStreamContents(stream, new String[] { "GB" });
}
项目:search    文件:TestStemmerOverrideFilterFactory.java   
public void testKeywords() throws Exception {
  // our stemdict stems dogs to 'cat'
  Reader reader = new StringReader("testing dogs");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("dogs\tcat"),
      "dictionary", "stemdict.txt").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);

  assertTokenStreamContents(stream, new String[] { "test", "cat" });
}
项目:search    文件:TestStemmerOverrideFilterFactory.java   
public void testKeywordsCaseInsensitive() throws Exception {
  Reader reader = new StringReader("testing DoGs");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("dogs\tcat"),
      "dictionary", "stemdict.txt",
      "ignoreCase", "true").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);

  assertTokenStreamContents(stream, new String[] { "test", "cat" });
}
项目:search    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywords() throws Exception {
  Reader reader = new StringReader("dogs cats");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats" });
}
项目:search    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsMixed() throws Exception {
  Reader reader = new StringReader("dogs cats birds");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt",
      "pattern", "birds|Dogs").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats", "birds" });
}
项目:search    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsCaseInsensitive() throws Exception {
  Reader reader = new StringReader("dogs cats Cats");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt",
      "ignoreCase", "true").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats" });
}
项目:search    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsCaseInsensitiveMixed() throws Exception {
  Reader reader = new StringReader("dogs cats Cats Birds birds");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt",
      "pattern", "birds",
      "ignoreCase", "true").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats", "Birds", "birds" });
}
项目:search    文件:TestSnowballPorterFilterFactory.java   
/**
 * Test the protected words mechanism of SnowballPorterFilterFactory
 */
public void testProtected() throws Exception {
  Reader reader = new StringReader("ridding of some stemming");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("SnowballPorter", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("ridding"),
      "protected", "protwords.txt",
      "language", "English").create(stream);

  assertTokenStreamContents(stream, new String[] { "ridding", "of", "some", "stem" });
}
项目:search    文件:TestCollationKeyFilterFactory.java   
public void testCustomRules() throws Exception {
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new Locale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  //
  // at this point, you would save these tailoredRules to a file, 
  // and use the custom parameter.
  //
  String germanUmlaut = "Töne";
  String germanOE = "Toene";
  Map<String,String> args = new HashMap<>();
  args.put("custom", "rules.txt");
  args.put("strength", "primary");
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory(args);
  factory.inform(new StringMockResourceLoader(tailoredRules));
  TokenStream tsUmlaut = factory.create(
      new MockTokenizer(new StringReader(germanUmlaut), MockTokenizer.KEYWORD, false));
  TokenStream tsOE = factory.create(
      new MockTokenizer(new StringReader(germanOE), MockTokenizer.KEYWORD, false));

  assertCollatesToSame(tsUmlaut, tsOE);
}
项目:NYBC    文件:TestMultiWordSynonyms.java   
public void testMultiWordSynonyms() throws IOException {
  SynonymFilterFactory factory = new SynonymFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("synonyms", "synonyms.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(new StringMockResourceLoader("a b c,d"));
  TokenStream ts = factory.create(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false));
  // This fails because ["e","e"] is the value of the token stream
  assertTokenStreamContents(ts, new String[] { "a", "e" });
}
项目:NYBC    文件:TestSynonymFilterFactory.java   
/** if the synonyms are completely empty, test that we still analyze correctly */
public void testEmptySynonyms() throws Exception {
  SynonymFilterFactory factory = new SynonymFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("synonyms", "synonyms.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(new StringMockResourceLoader("")); // empty file!
  TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
  assertTokenStreamContents(ts, new String[] { "GB" });
}
项目:NYBC    文件:TestStemmerOverrideFilterFactory.java   
public void testKeywords() throws IOException {
  // our stemdict stems dogs to 'cat'
  Reader reader = new StringReader("testing dogs");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
  args.put("dictionary", "stemdict.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);

  TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
  assertTokenStreamContents(ts, new String[] { "test", "cat" });
}
项目:NYBC    文件:TestStemmerOverrideFilterFactory.java   
public void testKeywordsCaseInsensitive() throws IOException {
  Reader reader = new StringReader("testing DoGs");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
  args.put("dictionary", "stemdict.txt");
  args.put("ignoreCase", "true");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);

  TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
  assertTokenStreamContents(ts, new String[] { "test", "cat" });
}
项目:NYBC    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywords() throws IOException {
  Reader reader = new StringReader("dogs cats");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  ResourceLoader loader = new StringMockResourceLoader("cats");
  args.put("protected", "protwords.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);

  TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
  assertTokenStreamContents(ts, new String[] { "dog", "cats" });
}
项目:NYBC    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsCaseInsensitive() throws IOException {
  Reader reader = new StringReader("dogs cats Cats");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  ResourceLoader loader = new StringMockResourceLoader("cats");
  args.put("protected", "protwords.txt");
  args.put("ignoreCase", "true");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);

  TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
  assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" });
}
项目:NYBC    文件:TestSnowballPorterFilterFactory.java   
/**
 * Test the protected words mechanism of SnowballPorterFilterFactory
 */
public void testProtected() throws Exception {
  SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
  ResourceLoader loader = new StringMockResourceLoader("ridding");
  Map<String,String> args = new HashMap<String,String>();
  args.put("protected", "protwords.txt");
  args.put("language", "English");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  Reader reader = new StringReader("ridding of some stemming");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, new String[] { "ridding", "of", "some", "stem" });
}
项目:NYBC    文件:TestCollationKeyFilterFactory.java   
public void testBasicUsage() throws IOException {
  String turkishUpperCase = "I WİLL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("language", "tr");
  args.put("strength", "primary");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsUpper = factory.create(
      new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
  TokenStream tsLower = factory.create(
      new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:NYBC    文件:TestCollationKeyFilterFactory.java   
public void testCustomRules() throws Exception {
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new Locale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  //
  // at this point, you would save these tailoredRules to a file, 
  // and use the custom parameter.
  //
  String germanUmlaut = "Töne";
  String germanOE = "Toene";
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("custom", "rules.txt");
  args.put("strength", "primary");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(tailoredRules));
  TokenStream tsUmlaut = factory.create(
      new MockTokenizer(new StringReader(germanUmlaut), MockTokenizer.KEYWORD, false));
  TokenStream tsOE = factory.create(
      new MockTokenizer(new StringReader(germanOE), MockTokenizer.KEYWORD, false));

  assertCollatesToSame(tsUmlaut, tsOE);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestMultiWordSynonyms.java   
public void testMultiWordSynonyms() throws Exception {
  Reader reader = new StringReader("a e");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("a b c,d"),
      "synonyms", "synonyms.txt").create(stream);
  // This fails because ["e","e"] is the value of the token stream
  assertTokenStreamContents(stream, new String[] { "a", "e" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSynonymFilterFactory.java   
/** test multiword offsets with the old impl
 * @deprecated Remove this test in Lucene 5.0 */
@Deprecated
public void testMultiwordOffsetsOld() throws Exception {
  Reader reader = new StringReader("national hockey league");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", Version.LUCENE_33, new StringMockResourceLoader("national hockey league, nhl"),
      "synonyms", "synonyms.txt").create(stream);
  // WTF?
  assertTokenStreamContents(stream, 
      new String[] { "national", "nhl", "hockey", "league" },
      new int[] { 0, 0, 0, 0 },
      new int[] { 22, 22, 22, 22 },
      new int[] { 1, 0, 1, 1 });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSynonymFilterFactory.java   
/** if the synonyms are completely empty, test that we still analyze correctly */
public void testEmptySynonyms() throws Exception {
  Reader reader = new StringReader("GB");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT, 
      new StringMockResourceLoader(""), // empty file!
      "synonyms", "synonyms.txt").create(stream);
  assertTokenStreamContents(stream, new String[] { "GB" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestStemmerOverrideFilterFactory.java   
public void testKeywords() throws Exception {
  // our stemdict stems dogs to 'cat'
  Reader reader = new StringReader("testing dogs");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("dogs\tcat"),
      "dictionary", "stemdict.txt").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);

  assertTokenStreamContents(stream, new String[] { "test", "cat" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestStemmerOverrideFilterFactory.java   
public void testKeywordsCaseInsensitive() throws Exception {
  Reader reader = new StringReader("testing DoGs");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("StemmerOverride", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("dogs\tcat"),
      "dictionary", "stemdict.txt",
      "ignoreCase", "true").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);

  assertTokenStreamContents(stream, new String[] { "test", "cat" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywords() throws Exception {
  Reader reader = new StringReader("dogs cats");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsMixed() throws Exception {
  Reader reader = new StringReader("dogs cats birds");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt",
      "pattern", "birds|Dogs").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats", "birds" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsCaseInsensitive() throws Exception {
  Reader reader = new StringReader("dogs cats Cats");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt",
      "ignoreCase", "true").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestKeywordMarkerFilterFactory.java   
public void testKeywordsCaseInsensitiveMixed() throws Exception {
  Reader reader = new StringReader("dogs cats Cats Birds birds");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("KeywordMarker", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("cats"),
      "protected", "protwords.txt",
      "pattern", "birds",
      "ignoreCase", "true").create(stream);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog", "cats", "Cats", "Birds", "birds" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSnowballPorterFilterFactory.java   
/**
 * Test the protected words mechanism of SnowballPorterFilterFactory
 */
public void testProtected() throws Exception {
  Reader reader = new StringReader("ridding of some stemming");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("SnowballPorter", TEST_VERSION_CURRENT,
      new StringMockResourceLoader("ridding"),
      "protected", "protwords.txt",
      "language", "English").create(stream);

  assertTokenStreamContents(stream, new String[] { "ridding", "of", "some", "stem" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestCollationKeyFilterFactory.java   
public void testCustomRules() throws Exception {
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new Locale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  //
  // at this point, you would save these tailoredRules to a file, 
  // and use the custom parameter.
  //
  String germanUmlaut = "Töne";
  String germanOE = "Toene";
  Map<String,String> args = new HashMap<String,String>();
  args.put("custom", "rules.txt");
  args.put("strength", "primary");
  CollationKeyFilterFactory factory = new CollationKeyFilterFactory(args);
  factory.inform(new StringMockResourceLoader(tailoredRules));
  TokenStream tsUmlaut = factory.create(
      new MockTokenizer(new StringReader(germanUmlaut), MockTokenizer.KEYWORD, false));
  TokenStream tsOE = factory.create(
      new MockTokenizer(new StringReader(germanOE), MockTokenizer.KEYWORD, false));

  assertCollatesToSame(tsUmlaut, tsOE);
}