Java 类org.apache.lucene.analysis.CharFilter 实例源码

项目:elasticsearch-analysis-openkoreantext    文件:OpenKoreanTextNormalizerTest.java   
@Test
public void testNormalizerCharFilter() throws Exception {
    String query = "한국어를 처리하는 예시입니닼ㅋ. 오픈코리안텍스틓ㅎㅎㅎㅎㅎㅎㅎ";
    String expected = "한국어를 처리하는 예시입니다ㅋ. 오픈코리안텍스트ㅎㅎㅎ";

    CharFilter inputReader = new OpenKoreanTextNormalizer(new StringReader(query));

    char[] tempBuff = new char[10];
    StringBuilder actual = new StringBuilder();

    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1) break;
        actual.append(tempBuff, 0, length);
    }

    Assert.assertEquals(expected, actual.toString());
}
项目:elasticsearch_my    文件:SimpleIcuNormalizerCharFilterTests.java   
public void testDefaultSetting() throws Exception {
    Settings settings = Settings.builder()
        .put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer")
        .build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");

    String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
    Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
    String expectedOutput = normalizer.normalize(input);
    CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input));
    char[] tempBuff = new char[10];
    StringBuilder output = new StringBuilder();
    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1) break;
        output.append(tempBuff, 0, length);
        assertEquals(output.toString(), normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length()))));
    }
    assertEquals(expectedOutput, output.toString());
}
项目:elasticsearch_my    文件:SimpleIcuNormalizerCharFilterTests.java   
public void testNameAndModeSetting() throws Exception {
    Settings settings = Settings.builder()
        .put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer")
        .put("index.analysis.char_filter.myNormalizerChar.name", "nfkc")
        .put("index.analysis.char_filter.myNormalizerChar.mode", "decompose")
        .build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");

    String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
    Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.DECOMPOSE);
    String expectedOutput = normalizer.normalize(input);
    CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input));
    char[] tempBuff = new char[10];
    StringBuilder output = new StringBuilder();
    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1) break;
        output.append(tempBuff, 0, length);
        assertEquals(output.toString(), normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length()))));
    }
    assertEquals(expectedOutput, output.toString());
}
项目:search    文件:TestICUNormalizer2CharFilter.java   
public void testNormalization() throws IOException {
  String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
  Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
  String expectedOutput = normalizer.normalize(input);

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), normalizer);
  char[] tempBuff = new char[10];
  StringBuilder output = new StringBuilder();
  while (true) {
    int length = reader.read(tempBuff);
    if (length == -1) {
      break;
    }
    output.append(tempBuff, 0, length);
    assertEquals(output.toString(), normalizer.normalize(input.substring(0, reader.correctOffset(output.length()))));
  }

  assertEquals(expectedOutput, output.toString());
}
项目:search    文件:TestRandomChains.java   
private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
  CharFilterSpec spec = new CharFilterSpec();
  spec.reader = reader;
  StringBuilder descr = new StringBuilder();
  int numFilters = random.nextInt(3);
  for (int i = 0; i < numFilters; i++) {
    while (true) {
      final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
      final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
      if (broken(ctor, args)) {
        continue;
      }
      reader = createComponent(ctor, args, descr);
      if (reader != null) {
        spec.reader = reader;
        break;
      }
    }
  }
  spec.toString = descr.toString();
  return spec;
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
  Map<String, String> tokenizerArgs = Collections.emptyMap();
  tokenizerFactory.init(tokenizerArgs);
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  Map<String, String> filterArgs = Collections.emptyMap();
  filterFactory.init(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"});
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testKanjiOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
  Map<String, String> tokenizerArgs = Collections.emptyMap();
  tokenizerFactory.init(tokenizerArgs);
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  Map<String, String> filterArgs = new HashMap<String, String>();
  filterArgs.put("normalizeKanji", "true");
  filterArgs.put("normalizeKana", "false");
  filterFactory.init(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"});
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testKanaOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
  Map<String, String> tokenizerArgs = Collections.emptyMap();
  tokenizerFactory.init(tokenizerArgs);
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  Map<String, String> filterArgs = new HashMap<String, String>();
  filterArgs.put("normalizeKanji", "false");
  filterArgs.put("normalizeKana", "true");
  filterFactory.init(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(filter);
  assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
}
项目:NYBC    文件:TestRandomChains.java   
private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
  CharFilterSpec spec = new CharFilterSpec();
  spec.reader = reader;
  StringBuilder descr = new StringBuilder();
  int numFilters = random.nextInt(3);
  for (int i = 0; i < numFilters; i++) {
    while (true) {
      final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
      final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
      if (broken(ctor, args)) {
        continue;
      }
      reader = createComponent(ctor, args, descr);
      if (reader != null) {
        spec.reader = reader;
        break;
      }
    }
  }
  spec.toString = descr.toString();
  return spec;
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRandomChains.java   
private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
  CharFilterSpec spec = new CharFilterSpec();
  spec.reader = reader;
  StringBuilder descr = new StringBuilder();
  int numFilters = random.nextInt(3);
  for (int i = 0; i < numFilters; i++) {
    while (true) {
      final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
      final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
      if (broken(ctor, args)) {
        continue;
      }
      reader = createComponent(ctor, args, descr);
      if (reader != null) {
        spec.reader = reader;
        break;
      }
    }
  }
  spec.toString = descr.toString();
  return spec;
}
项目:search    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testIterationMarksWithKeywordTokenizer() throws IOException {
  final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
  CharFilter filter = filterFactory.create(new StringReader(text));
  TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
  assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
}
项目:search    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory(), filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"});
}
项目:search    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testKanjiOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  Map<String, String> filterArgs = new HashMap<>();
  filterArgs.put("normalizeKanji", "true");
  filterArgs.put("normalizeKana", "false");
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory(), filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"});
}
项目:search    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testKanaOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  Map<String, String> filterArgs = new HashMap<>();
  filterArgs.put("normalizeKanji", "false");
  filterArgs.put("normalizeKana", "true");
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory(), filter);
  assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
}
项目:search    文件:TestJapaneseIterationMarkCharFilter.java   
public void testKanjiOnly() throws IOException {
  // Test kanji only repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      true, // kanji
      false // no kana
  );
  assertCharFilterEquals(filter, "時時、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
}
项目:search    文件:TestJapaneseIterationMarkCharFilter.java   
public void testKanaOnly() throws IOException {
  // Test kana only repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      false, // no kanji
      true   // kana
  );
  assertCharFilterEquals(filter, "時々、おおのさんと一緒にお寿司が食べたいです。abcところどころ。");
}
项目:search    文件:TestJapaneseIterationMarkCharFilter.java   
public void testNone() throws IOException {
  // Test no repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      false, // no kanji
      false  // no kana
  );
  assertCharFilterEquals(filter, "時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
}
项目:search    文件:TestICUNormalizer2CharFilter.java   
public void testTokenStream() throws IOException {
  // '℃', '№', '㈱', '㌘', 'サ'+'<<', 'ソ'+'<<', '㌰'+'<<'
  String input = "℃ № ㈱ ㌘ ザ ゾ ㌰゙";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

  assertTokenStreamContents(tokenStream,
    new String[] {"°C", "No", "(株)", "グラム", "ザ", "ゾ", "ピゴ"},
    new int[] {0, 2, 4, 6, 8, 11, 14},
    new int[] {1, 3, 5, 7, 10, 13, 16},
    input.length());
}
项目:search    文件:TestICUNormalizer2CharFilter.java   
public void testTokenStream2() throws IOException {
  // '㌰', '<<'゙, '5', '℃', '№', '㈱', '㌘', 'サ', '<<', 'ソ', '<<'
  String input = "㌰゙5℃№㈱㌘ザゾ";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new NGramTokenizer(newAttributeFactory(), reader, 1, 1);

  assertTokenStreamContents(tokenStream,
    new String[] {"ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ"},
    new int[]{0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9},
    new int[]{1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11},
    input.length()
  );
}
项目:search    文件:TestICUNormalizer2CharFilter.java   
public void testMassiveLigature() throws IOException {
  String input = "\uFDFA";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

  assertTokenStreamContents(tokenStream,
    new String[] {"صلى", "الله", "عليه", "وسلم"},
    new int[]{0, 0, 0, 0},
    new int[]{0, 0, 0, 1},
    input.length()
  );
}
项目:search    文件:TestPatternReplaceCharFilter.java   
private void checkOutput(String input, String pattern, String replacement,
    String expectedOutput, String expectedIndexMatchedOutput) throws IOException {
    CharFilter cs = new PatternReplaceCharFilter(pattern(pattern), replacement,
      new StringReader(input));

  StringBuilder output = new StringBuilder();
  for (int chr = cs.read(); chr > 0; chr = cs.read()) {
    output.append((char) chr);
  }

  StringBuilder indexMatched = new StringBuilder();
  for (int i = 0; i < output.length(); i++) {
    indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
  }

  boolean outputGood = expectedOutput.equals(output.toString());
  boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());

  if (!outputGood || !indexMatchedGood || false) {
    System.out.println("Pattern : " + pattern);
    System.out.println("Replac. : " + replacement);
    System.out.println("Input   : " + input);
    System.out.println("Output  : " + output);
    System.out.println("Expected: " + expectedOutput);
    System.out.println("Output/i: " + indexMatched);
    System.out.println("Expected: " + expectedIndexMatchedOutput);
    System.out.println();
  }

  assertTrue("Output doesn't match.", outputGood);
  assertTrue("Index-matched output doesn't match.", indexMatchedGood);
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void testNothingChange() throws IOException {
  final String BLOCK = "this is test.";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "this", "is", "test." },
      new int[] { 0, 5, 8 },
      new int[] { 4, 7, 13 }, 
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void testReplaceByEmpty() throws IOException {
  final String BLOCK = "aa bb cc";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts, new String[] {});
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void test1block1matchSameLength() throws IOException {
  final String BLOCK = "aa bb cc";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2#$3",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa#bb#cc" },
      new int[] { 0 },
      new int[] { 8 }, 
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void test1block1matchLonger() throws IOException {
  final String BLOCK = "aa bb cc dd";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1##$2###$3",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa##bb###cc", "dd" },
      new int[] { 0, 9 },
      new int[] { 8, 11 },
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void test1block2matchLonger() throws IOException {
  final String BLOCK = " a  a";
  CharFilter cs = new PatternReplaceCharFilter( pattern("a"), "aa",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa", "aa" },
      new int[] { 1, 4 },
      new int[] { 2, 5 },
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void test1block1matchShorter() throws IOException {
  final String BLOCK = "aa  bb   cc dd";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa#bb", "dd" },
      new int[] { 0, 12 },
      new int[] { 11, 14 },
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void test1blockMultiMatches() throws IOException {
  final String BLOCK = "  aa bb cc --- aa bb aa   bb   cc";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1  $2  $3",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" },
      new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 },
      new int[] { 4, 8, 10, 14, 17, 20, 23, 27, 33 },
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void test2blocksMultiMatches() throws IOException {
  final String BLOCK = "  aa bb cc --- aa bb aa. bb aa   bb cc";

  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)"), "$1##$2",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" },
      new int[] { 2, 8, 11, 15, 21, 25, 28, 36 },
      new int[] { 7, 10, 14, 20, 24, 27, 35, 38 },
      BLOCK.length());
}
项目:search    文件:TestPatternReplaceCharFilter.java   
public void testChain() throws IOException {
  final String BLOCK = " a bb - ccc . --- bb a . ccc ccc bb";
  CharFilter cs = new PatternReplaceCharFilter( pattern("a"), "aa",
      new StringReader( BLOCK ) );
  cs = new PatternReplaceCharFilter( pattern("bb"), "b", cs );
  cs = new PatternReplaceCharFilter( pattern("ccc"), "c", cs );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "aa", "b", "-", "c", ".", "---", "b", "aa", ".", "c", "c", "b" },
      new int[] { 1, 3, 6, 8, 12, 14, 18, 21, 23, 25, 29, 33 },
      new int[] { 2, 5, 7, 11, 13, 17, 20, 22, 24, 28, 32, 35 },
      BLOCK.length());
}
项目:search    文件:TestMappingCharFilter.java   
public void testReaderReset() throws Exception {
  CharFilter cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
  char[] buf = new char[10];
  int len = cs.read(buf, 0, 10);
  assertEquals( 1, len );
  assertEquals( 'x', buf[0]) ;
  len = cs.read(buf, 0, 10);
  assertEquals( -1, len );

  // rewind
  cs.reset();
  len = cs.read(buf, 0, 10);
  assertEquals( 1, len );
  assertEquals( 'x', buf[0]) ;
}
项目:search    文件:TestMappingCharFilter.java   
public void testTokenStream() throws Exception {
  String testString = "h i j k ll cccc bbb aa";
  CharFilter cs = new MappingCharFilter( normMap, new StringReader( testString ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
    new String[]{"i","i","jj","kkk","llll","cc","b","a"},
    new int[]{0,2,4,6,8,11,16,20},
    new int[]{1,3,5,7,10,15,19,22},
    testString.length()
  );
}
项目:search    文件:TestMappingCharFilter.java   
public void testChained() throws Exception {
  String testString = "aaaa ll h";
  CharFilter cs = new MappingCharFilter( normMap,
      new MappingCharFilter( normMap, new StringReader( testString ) ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
    new String[]{"a","llllllll","i"},
    new int[]{0,5,8},
    new int[]{4,7,9},
    testString.length()
  );
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilterFactory.java   
public void testIterationMarksWithKeywordTokenizer() throws IOException {
  final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  CharFilter filter = filterFactory.create(new StringReader(text));
  TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
  assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilter.java   
public void testKanjiOnly() throws IOException {
  // Test kanji only repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      true, // kanji
      false // no kana
  );
  assertCharFilterEquals(filter, "時時、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilter.java   
public void testKanaOnly() throws IOException {
  // Test kana only repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      false, // no kanji
      true   // kana
  );
  assertCharFilterEquals(filter, "時々、おおのさんと一緒にお寿司が食べたいです。abcところどころ。");
}
项目:NYBC    文件:TestJapaneseIterationMarkCharFilter.java   
public void testNone() throws IOException {
  // Test no repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      false, // no kanji
      false  // no kana
  );
  assertCharFilterEquals(filter, "時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
}
项目:NYBC    文件:TestPatternReplaceCharFilter.java   
private void checkOutput(String input, String pattern, String replacement,
    String expectedOutput, String expectedIndexMatchedOutput) throws IOException {
    CharFilter cs = new PatternReplaceCharFilter(pattern(pattern), replacement,
      new StringReader(input));

  StringBuilder output = new StringBuilder();
  for (int chr = cs.read(); chr > 0; chr = cs.read()) {
    output.append((char) chr);
  }

  StringBuilder indexMatched = new StringBuilder();
  for (int i = 0; i < output.length(); i++) {
    indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
  }

  boolean outputGood = expectedOutput.equals(output.toString());
  boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());

  if (!outputGood || !indexMatchedGood || false) {
    System.out.println("Pattern : " + pattern);
    System.out.println("Replac. : " + replacement);
    System.out.println("Input   : " + input);
    System.out.println("Output  : " + output);
    System.out.println("Expected: " + expectedOutput);
    System.out.println("Output/i: " + indexMatched);
    System.out.println("Expected: " + expectedIndexMatchedOutput);
    System.out.println();
  }

  assertTrue("Output doesn't match.", outputGood);
  assertTrue("Index-matched output doesn't match.", indexMatchedGood);
}
项目:NYBC    文件:TestPatternReplaceCharFilter.java   
public void testNothingChange() throws IOException {
  final String BLOCK = "this is test.";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "this", "is", "test." },
      new int[] { 0, 5, 8 },
      new int[] { 4, 7, 13 }, 
      BLOCK.length());
}
项目:NYBC    文件:TestPatternReplaceCharFilter.java   
public void testReplaceByEmpty() throws IOException {
  final String BLOCK = "aa bb cc";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts, new String[] {});
}