Java 类org.apache.lucene.analysis.MockTokenFilter 实例源码

项目:search    文件:TestSpansAdvanced2.java   
/**
 * Initializes the tests by adding documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();

  // create test index
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
          .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())
          .setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "A", "Should we, could we, would we?");
  addDocument(writer, "B", "It should.  Should it?");
  addDocument(writer, "C", "It shouldn't.");
  addDocument(writer, "D", "Should we, should we, should we.");
  reader2 = writer.getReader();
  writer.close();

  // re-open the searcher since we added more docs
  searcher2 = newSearcher(reader2);
  searcher2.setSimilarity(new DefaultSimilarity());
}
项目:search    文件:TestSpansAdvanced.java   
/**
 * Initializes the tests by adding 4 identical documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();
  // create test index
  mDirectory = newDirectory();
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, 
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
          .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "1", "I think it should work.");
  addDocument(writer, "2", "I think it should work.");
  addDocument(writer, "3", "I think it should work.");
  addDocument(writer, "4", "I think it should work.");
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new DefaultSimilarity());
}
项目:search    文件:TestIndexWriter.java   
public void testStopwordsPosIncHole() throws Exception {
  Directory dir = newDirectory();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader);
      TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
      return new TokenStreamComponents(tokenizer, stream);
    }
  };
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
  Document doc = new Document();
  doc.add(new TextField("body", "just a", Field.Store.NO));
  doc.add(new TextField("body", "test of gaps", Field.Store.NO));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  PhraseQuery pq = new PhraseQuery();
  pq.add(new Term("body", "just"), 0);
  pq.add(new Term("body", "test"), 2);
  // body:"just ? test"
  assertEquals(1, is.search(pq, 5).totalHits);
  ir.close();
  dir.close();
}
项目:NYBC    文件:HighlightCustomQueryTest.java   
/**
 * This method intended for use with
 * <tt>testHighlightingWithDefaultField()</tt>
 */
private String highlightField(Query query, String fieldName,
    String text) throws IOException, InvalidTokenOffsetsException {
  TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
      true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName,
      new StringReader(text));
  // Assuming "<B>", "</B>" used to highlight
  SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
  MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
  Highlighter highlighter = new Highlighter(formatter, scorer);
  highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

  String rv = highlighter.getBestFragments(tokenStream, text, 1,
      "(FIELD TEXT TRUNCATED)");
  return rv.length() == 0 ? text : rv;
}
项目:NYBC    文件:TestSpansAdvanced2.java   
/**
 * Initializes the tests by adding documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();

  // create test index
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory,
      newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(),
          MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))
          .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())
          .setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "A", "Should we, could we, would we?");
  addDocument(writer, "B", "It should.  Should it?");
  addDocument(writer, "C", "It shouldn't.");
  addDocument(writer, "D", "Should we, should we, should we.");
  reader2 = writer.getReader();
  writer.close();

  // re-open the searcher since we added more docs
  searcher2 = newSearcher(reader2);
  searcher2.setSimilarity(new DefaultSimilarity());
}
项目:NYBC    文件:TestSpansAdvanced.java   
/**
 * Initializes the tests by adding 4 identical documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();
  // create test index
  mDirectory = newDirectory();
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, 
      newIndexWriterConfig(TEST_VERSION_CURRENT, 
          new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))
          .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "1", "I think it should work.");
  addDocument(writer, "2", "I think it should work.");
  addDocument(writer, "3", "I think it should work.");
  addDocument(writer, "4", "I think it should work.");
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new DefaultSimilarity());
}
项目:lucene-addons    文件:TestQPTestBaseSpanQuery.java   
@Override
public void testPositionIncrement() throws Exception {
  //For SQP, this only tests whether stop words have been dropped.
  //PositionIncrements are not available in SpanQueries yet.
  CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
  //qp.setEnablePositionIncrements(true);
  String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
  //               0         2                      5           7  8
  SpanNearQuery pq = (SpanNearQuery) getQuery(qtxt,qp);
  SpanQuery[] clauses = pq.getClauses();
  assertEquals(clauses.length, 5);
  Set<Term> expected = new HashSet<Term>();
  expected.add(new Term("field", "words"));
  expected.add(new Term("field", "poisitions"));
  expected.add(new Term("field", "pos"));
  expected.add(new Term("field", "stopped"));
  expected.add(new Term("field", "phrasequery"));
}
项目:lucene-addons    文件:QueryParserTestBase.java   
public void testBoost()
    throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);

  assertEquals(getBoost(q), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(((BoostQuery)q).getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertEmpty(q);
  assertEquals(1.0f, getBoost(q), 0.01f);
}
项目:lucene-addons    文件:QueryParserTestBase.java   
public void testPositionIncrement() throws Exception {
  CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
  qp.setEnablePositionIncrements(true);
  String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
  //               0         2                      5           7  8
  int expectedPositions[] = {1,3,4,6,9};
  PhraseQuery pq = (PhraseQuery) getQuery(qtxt,qp);
  //System.out.println("Query text: "+qtxt);
  //System.out.println("Result: "+pq);
  Term t[] = pq.getTerms();
  int pos[] = pq.getPositions();
  for (int i = 0; i < t.length; i++) {
    //System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
    assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]);
  }
}
项目:lucene-addons    文件:QueryParserTestBase.java   
public void testPositionIncrements() throws Exception {
  Directory dir = newDirectory();
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(a));
  Document doc = new Document();
  doc.add(newTextField("field", "the wizard of ozzy", Field.Store.NO));
  w.addDocument(doc);
  IndexReader r = DirectoryReader.open(w);
  w.close();
  IndexSearcher s = newSearcher(r);

  Query q = getQuery("\"wizard of ozzy\"",a);
  assertEquals(1, s.search(q, 1).totalHits);
  r.close();
  dir.close();
}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testMismatchingFieldsInStandardQueryConversion() throws Exception {
  // tests what happens if a Query doesn't contain a term in the "span" field
  // in the searcher...should be no exception and zero documents returned.

  String[] docs = new String[]{"a b c a b c",};
  Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);

  ConcordanceSearcher searcher = new ConcordanceSearcher(
      new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));

  Query q = new TermQuery(new Term("_" + FIELD, "a"));

  int windowCount = -1;
  ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);

  searcher.search(indexSearcher, FIELD,
      q, null, analyzer, collector);
  windowCount = collector.size();
  assertEquals(0, windowCount);
  reader.close();
  directory.close();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSpansAdvanced2.java   
/**
 * Initializes the tests by adding documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();

  // create test index
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory,
      newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(),
          MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
          .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())
          .setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "A", "Should we, could we, would we?");
  addDocument(writer, "B", "It should.  Should it?");
  addDocument(writer, "C", "It shouldn't.");
  addDocument(writer, "D", "Should we, should we, should we.");
  reader2 = writer.getReader();
  writer.close();

  // re-open the searcher since we added more docs
  searcher2 = newSearcher(reader2);
  searcher2.setSimilarity(new DefaultSimilarity());
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSpansAdvanced.java   
/**
 * Initializes the tests by adding 4 identical documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();
  // create test index
  mDirectory = newDirectory();
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, 
      newIndexWriterConfig(TEST_VERSION_CURRENT, 
          new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
          .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "1", "I think it should work.");
  addDocument(writer, "2", "I think it should work.");
  addDocument(writer, "3", "I think it should work.");
  addDocument(writer, "4", "I think it should work.");
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new DefaultSimilarity());
}
项目:search    文件:HighlightCustomQueryTest.java   
/**
 * This method intended for use with
 * <tt>testHighlightingWithDefaultField()</tt>
 */
private String highlightField(Query query, String fieldName,
    String text) throws IOException, InvalidTokenOffsetsException {
  TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
      true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text);
  // Assuming "<B>", "</B>" used to highlight
  SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
  MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
  Highlighter highlighter = new Highlighter(formatter, scorer);
  highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

  String rv = highlighter.getBestFragments(tokenStream, text, 1,
      "(FIELD TEXT TRUNCATED)");
  return rv.length() == 0 ? text : rv;
}
项目:search    文件:MemoryIndexTest.java   
/**
 * Return a random analyzer (Simple, Stop, Standard) to analyze the terms.
 */
private Analyzer randomAnalyzer() {
  switch(random().nextInt(4)) {
    case 0: return new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
    case 1: return new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    case 2: return new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new CrazyTokenFilter(tokenizer));
      }
    };
    default: return new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  }
}
项目:search    文件:FuzzySuggesterTest.java   
/**
 * basic "standardanalyzer" test with stopword removal
 */
public void testStandard() throws Exception {
  Input keys[] = new Input[] {
      new Input("the ghost of christmas past", 50),
  };

  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
  FuzzySuggester suggester = new FuzzySuggester(standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
      FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
  suggester.build(new InputArrayIterator(keys));

  List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' since its a stopword, its suggested anyway
  results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' and 'of' since they are stopwords, its suggested anyway
  results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);
}
项目:search    文件:AnalyzingSuggesterTest.java   
/**
 * basic "standardanalyzer" test with stopword removal
 */
public void testStandard() throws Exception {
  Input keys[] = new Input[] {
      new Input("the ghost of christmas past", 50),
  };

  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
  AnalyzingSuggester suggester = new AnalyzingSuggester(standard, standard, 
      AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false);

  suggester.build(new InputArrayIterator(keys));

  List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' since its a stopword, its suggested anyway
  results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' and 'of' since they are stopwords, its suggested anyway
  results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);
}
项目:search    文件:AnalyzingSuggesterTest.java   
public void testEmpty() throws Exception {
  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
  AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
  suggester.build(new InputArrayIterator(new Input[0]));

  List<LookupResult> result = suggester.lookup("a", false, 20);
  assertTrue(result.isEmpty());
}
项目:search    文件:TestIDVersionPostingsFormat.java   
public void testMissingPayload() throws Exception {
  Directory dir = newDirectory();

  // MockAnalyzer minus maybePayload else it sometimes stuffs in an 8-byte payload!
  Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true, 100);
        tokenizer.setEnableChecks(true);
        MockTokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
        return new TokenStreamComponents(tokenizer, filt);
      }
    };
  IndexWriterConfig iwc = newIndexWriterConfig(a);
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  doc.add(newTextField("id", "id", Field.Store.NO));
  try {
    w.addDocument(doc);
    w.commit();
    fail("didn't hit expected exception");
  } catch (IllegalArgumentException iae) {
    // expected
  }

  w.close();
  dir.close();
}
项目:search    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  // TODO: could probably use a purely random automaton
  switch(random.nextInt(5)) {
    case 0: return MockTokenizer.KEYWORD;
    case 1: return MockTokenizer.SIMPLE;
    case 2: return MockTokenizer.WHITESPACE;
    case 3: return MockTokenFilter.EMPTY_STOPSET;
    default: return MockTokenFilter.ENGLISH_STOPSET;
  }
}
项目:search    文件:TestParser.java   
@BeforeClass
public static void beforeClass() throws Exception {
  // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT):
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
  //initialize the parser
  builder = new CorePlusExtensionsParser("contents", analyzer);

  BufferedReader d = new BufferedReader(new InputStreamReader(
      TestParser.class.getResourceAsStream("reuters21578.txt"), StandardCharsets.US_ASCII));
  dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  String line = d.readLine();
  while (line != null) {
    int endOfDate = line.indexOf('\t');
    String date = line.substring(0, endOfDate).trim();
    String content = line.substring(endOfDate).trim();
    Document doc = new Document();
    doc.add(newTextField("date", date, Field.Store.YES));
    doc.add(newTextField("contents", content, Field.Store.YES));
    doc.add(new IntField("date2", Integer.valueOf(date), Field.Store.NO));
    writer.addDocument(doc);
    line = d.readLine();
  }
  d.close();
  writer.close();
  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);

}
项目:search    文件:TestTermVectorsWriter.java   
public void testEndOffsetPositionStopFilter() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd the", customType);
  doc.add(f);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null);
  assertNotNull(termsEnum.next());
  DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(9, dpEnum.startOffset());
  assertEquals(13, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
项目:NYBC    文件:MemoryIndexTest.java   
/**
 * Return a random analyzer (Simple, Stop, Standard) to analyze the terms.
 */
private Analyzer randomAnalyzer() {
  switch(random().nextInt(3)) {
    case 0: return new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
    case 1: return new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
    default: return new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  }
}
项目:NYBC    文件:FuzzySuggesterTest.java   
/**
 * basic "standardanalyzer" test with stopword removal
 */
public void testStandard() throws Exception {
  TermFreq keys[] = new TermFreq[] {
      new TermFreq("the ghost of christmas past", 50),
  };

  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
  FuzzySuggester suggester = new FuzzySuggester(standard);
  suggester.build(new TermFreqArrayIterator(keys));

  List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' since its a stopword, its suggested anyway
  results = suggester.lookup(_TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' and 'of' since they are stopwords, its suggested anyway
  results = suggester.lookup(_TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);
}
项目:NYBC    文件:AnalyzingSuggesterTest.java   
/**
 * basic "standardanalyzer" test with stopword removal
 */
public void testStandard() throws Exception {
  TermFreq keys[] = new TermFreq[] {
      new TermFreq("the ghost of christmas past", 50),
  };

  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
  AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
  suggester.build(new TermFreqArrayIterator(keys));

  List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' since its a stopword, its suggested anyway
  results = suggester.lookup(_TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' and 'of' since they are stopwords, its suggested anyway
  results = suggester.lookup(_TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);
}
项目:NYBC    文件:AnalyzingSuggesterTest.java   
public void testEmpty() throws Exception {
  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
  AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
  suggester.build(new TermFreqArrayIterator(new TermFreq[0]));

  List<LookupResult> result = suggester.lookup("a", false, 20);
  assertTrue(result.isEmpty());
}
项目:NYBC    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  // TODO: could probably use a purely random automaton
  switch(random.nextInt(5)) {
    case 0: return MockTokenizer.KEYWORD;
    case 1: return MockTokenizer.SIMPLE;
    case 2: return MockTokenizer.WHITESPACE;
    case 3: return MockTokenFilter.EMPTY_STOPSET;
    default: return MockTokenFilter.ENGLISH_STOPSET;
  }
}
项目:NYBC    文件:TestParser.java   
@BeforeClass
public static void beforeClass() throws Exception {
  // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT):
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
  //initialize the parser
  builder = new CorePlusExtensionsParser("contents", analyzer);

  BufferedReader d = new BufferedReader(new InputStreamReader(
      TestParser.class.getResourceAsStream("reuters21578.txt"), "US-ASCII"));
  dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(Version.LUCENE_40, analyzer));
  String line = d.readLine();
  while (line != null) {
    int endOfDate = line.indexOf('\t');
    String date = line.substring(0, endOfDate).trim();
    String content = line.substring(endOfDate).trim();
    Document doc = new Document();
    doc.add(newTextField("date", date, Field.Store.YES));
    doc.add(newTextField("contents", content, Field.Store.YES));
    doc.add(new IntField("date2", Integer.valueOf(date), Field.Store.NO));
    writer.addDocument(doc);
    line = d.readLine();
  }
  d.close();
  writer.close();
  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);

}
项目:NYBC    文件:TestTermVectorsWriter.java   
public void testEndOffsetPositionStopFilter() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( 
      TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd the", customType);
  doc.add(f);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null);
  assertNotNull(termsEnum.next());
  DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(9, dpEnum.startOffset());
  assertEquals(13, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
项目:lucene-addons    文件:QueryParserTestBase.java   
public void testPhraseQueryToString() throws Exception {
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
  CommonQueryParserConfiguration qp = getParserConfig(analyzer);
  qp.setEnablePositionIncrements(true);
  PhraseQuery q = (PhraseQuery)getQuery("\"this hi this is a test is\"", qp);
  assertEquals("field:\"? hi ? ? ? test\"", q.toString());
}
项目:lucene-addons    文件:TestConcordanceArrayWindowSearcher.java   
@Test
public void testWithStops() throws Exception {
  String[] docs = new String[]{"a b the d the f", "b c the d the e"};
  Analyzer analyzer = getAnalyzer(
      MockTokenFilter.ENGLISH_STOPSET, 50, 100);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);

  IDFIndexCalc idfer = new IDFIndexCalc(reader);
  CooccurVisitor visitor = new CooccurVisitor(
      FIELD, 10, 10, new WGrammer(1, 1, false), idfer, 100, true);

  ((CooccurVisitor) visitor).setMinTermFreq(0);
  ConcordanceArrayWindowSearcher searcher = new ConcordanceArrayWindowSearcher();
  SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

  searcher.search(indexSearcher, FIELD, q, null, analyzer, visitor,
      new IndexIdDocIdBuilder());

  List<TermIDF> results = ((CooccurVisitor) visitor).getResults();
  Map<String, Integer> truth = new HashMap<String, Integer>();

  truth.put("b", 2);
  truth.put("c", 1);
  truth.put("e", 1);
  truth.put("f", 1);
  assertEquals(truth.size(), results.size());

  for (TermIDF r : results) {

    assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
  }
  reader.close();
  directory.close();
}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testWindowLengths() throws Exception {
  String[] doc = new String[]{"a b c d e f g"};
  List<String[]> docs = new ArrayList<>();
  docs.add(doc);
  Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);

  SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

  String[] pres = {"", "c", "b c", "a b c", "a b c", "a b c"};
  String[] posts = {"", " e", " e f", " e f g", " e f g", " e f g"};

  for (int tokensBefore = 0; tokensBefore < pres.length; tokensBefore++) {
    for (int tokensAfter = 0; tokensAfter < posts.length; tokensAfter++) {
      WindowBuilder wb = new WindowBuilder(tokensBefore, tokensAfter,
          analyzer.getOffsetGap(FIELD));
      ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
      ConcordanceWindowCollector collector = new ConcordanceWindowCollector(100);
      searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);
      ConcordanceWindow w = collector.getSortedWindows().get(0);
      assertEquals(tokensBefore + " : " + tokensAfter, pres[tokensBefore], w.getPre());
      assertEquals(tokensBefore + " : " + tokensAfter, posts[tokensAfter], w.getPost());
    }
  }

  reader.close();
  directory.close();

}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testWithStops() throws Exception {
  String[] docs = new String[]{"a b the d e the f", "g h the d the j"};
  Analyzer analyzer = getAnalyzer(MockTokenFilter.ENGLISH_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);
  WindowBuilder wb = new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD));

  ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
  SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));
  ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3);

  searcher.search(indexSearcher, FIELD,
      q, null, analyzer, collector);
  List<ConcordanceWindow> windows = collector.getSortedWindows();
  assertEquals(2, windows.size());

  // the second word after the target is a stop word
  // this post-component of this window should only go to the first word after
  // the target
  assertEquals("b the", windows.get(0).getPre());
  assertEquals("d", windows.get(0).getTarget());
  assertEquals(" e", windows.get(0).getPost());

  assertEquals("h the", windows.get(1).getPre());
  assertEquals("d", windows.get(1).getTarget());
  assertEquals(" the j", windows.get(1).getPost());


  reader.close();
  directory.close();
}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testBasicStandardQueryConversion() throws Exception {
  String[] docs = new String[]{"a b c a b c", "c b a c b a d e a",
      "c b a c b a e a b c a"};
  Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);
  ConcordanceSearcher searcher = new ConcordanceSearcher(
      new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
  BooleanQuery q = new BooleanQuery.Builder()
    .add(new TermQuery(new Term(FIELD, "a")), Occur.MUST)
    .add(new TermQuery(new Term(FIELD, "d")),
      Occur.MUST_NOT).build();

  ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
  searcher.search(indexSearcher,
      FIELD, q, null,
      analyzer, collector);
  // shouldn't include document with "d"
  assertEquals(6, collector.size());

  // should only include document with "e" and not "d"
  Query filter = new TermQuery(new Term(
      FIELD, "e"));
  collector = new ConcordanceWindowCollector(10);

  searcher.search(indexSearcher, FIELD, (Query) q, filter, analyzer, collector);
  assertEquals(4, collector.size());

  reader.close();
  directory.close();
}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testUniqueCollector() throws Exception {
  String[] docs = new String[]{"a b c d c b a",
      "a B C d c b a",
      "a b C d C B a",
      "a b c d C B A",
      "e f g d g f e",
      "h i j d j i h"
  };

  Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);
  ConcordanceSearcher searcher = new ConcordanceSearcher(
      new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
  SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

  DedupingConcordanceWindowCollector collector = new DedupingConcordanceWindowCollector(2);
  searcher.search(indexSearcher,
      FIELD, (Query) q, null,
      analyzer, collector);
  assertEquals(2, collector.size());


  collector =
      new DedupingConcordanceWindowCollector(AbstractConcordanceWindowCollector.COLLECT_ALL);
  searcher.search(indexSearcher,
      FIELD, (Query) q, null,
      analyzer, collector);
  assertEquals(3, collector.size());


  reader.close();
  directory.close();

}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testUniqueCollectorWithSameWindowOverflow() throws Exception {
  String[] docs = new String[]{"a b c d c b a",
      "a b c d c b a",
      "a b c d c b a",
      "a b c d c b a",
      "e f g d g f e",
      "h i j d j i h"
  };

  Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);
  ConcordanceSearcher searcher = new ConcordanceSearcher(
      new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));

  SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

  DedupingConcordanceWindowCollector collector = new DedupingConcordanceWindowCollector(3);
  searcher.search(indexSearcher,
      FIELD, (Query) q, null,
      analyzer, collector);
  assertEquals(3, collector.size());
  assertEquals(4, collector.getSortedWindows().get(0).getCount());
  reader.close();
  directory.close();
}
项目:lucene-addons    文件:TestConcordanceSearcher.java   
@Test
public void testRewrites() throws Exception {
  //test to make sure that queries are rewritten
  //first test straight prefix queries
  String[] docs = new String[]{"aa ba ca aa ba ca", "ca ba aa ca ba aa da ea za",
      "ca ba aa ca ba aa ea aa ba ca za"};
  Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
  Directory directory = getDirectory(analyzer, docs);
  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher indexSearcher = new IndexSearcher(reader);
  ConcordanceSearcher searcher = new ConcordanceSearcher(
      new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
  BooleanQuery q = new BooleanQuery.Builder()
      .add(new PrefixQuery(new Term(FIELD, "a")), Occur.MUST)
      .add(new PrefixQuery(new Term(FIELD, "d")),
          Occur.MUST_NOT).build();

  //now test straight and span wrapper
  ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
  searcher.search(indexSearcher,
      FIELD, q, new PrefixQuery(new Term(FIELD, "z")),
      analyzer, collector);
  // shouldn't include document with "da", but must include one with za
  assertEquals(3, collector.size());

  collector = new ConcordanceWindowCollector(10);
  searcher.search(indexSearcher,
      FIELD, q, new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(FIELD, "z"))),
      analyzer, collector);
  // shouldn't include document with "da", but must include one with za
  assertEquals(3, collector.size());

  reader.close();
  directory.close();
}
项目:Maskana-Gestor-de-Conocimiento    文件:HighlightCustomQueryTest.java   
/**
 * This method intended for use with
 * <tt>testHighlightingWithDefaultField()</tt>
 */
private String highlightField(Query query, String fieldName,
    String text) throws IOException, InvalidTokenOffsetsException {
  TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
      true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text);
  // Assuming "<B>", "</B>" used to highlight
  SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
  MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
  Highlighter highlighter = new Highlighter(formatter, scorer);
  highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

  String rv = highlighter.getBestFragments(tokenStream, text, 1,
      "(FIELD TEXT TRUNCATED)");
  return rv.length() == 0 ? text : rv;
}
项目:Maskana-Gestor-de-Conocimiento    文件:MemoryIndexTest.java   
/**
 * Return a random analyzer (Simple, Stop, Standard) to analyze the terms.
 */
private Analyzer randomAnalyzer() {
  switch(random().nextInt(4)) {
    case 0: return new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
    case 1: return new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    case 2: return new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new CrazyTokenFilter(tokenizer));
      }
    };
    default: return new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:FuzzySuggesterTest.java   
/**
 * basic "standardanalyzer" test with stopword removal
 */
public void testStandard() throws Exception {
  Input keys[] = new Input[] {
      new Input("the ghost of christmas past", 50),
  };

  Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
  FuzzySuggester suggester = new FuzzySuggester(standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
      FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
  suggester.build(new InputArrayIterator(keys));

  List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' since its a stopword, its suggested anyway
  results = suggester.lookup(_TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);

  // omit the 'the' and 'of' since they are stopwords, its suggested anyway
  results = suggester.lookup(_TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("the ghost of christmas past", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);
}