Java 类org.apache.lucene.analysis.SimpleAnalyzer 实例源码

项目:jchampollion    文件:Corpus.java   
/**
 * Adds the a corpus to the index
 *
 * @param    source    The source {source,target} that should be added.
 */
public void buildIndex(String source, String filePath) {
    Date start = new Date();
    try {
        IndexWriter writer = new IndexWriter(source + "Index", new SimpleAnalyzer(), true);
        indexDocs(writer, new File(filePath));

        writer.optimize();
        writer.close();

        Date end = new Date();

        System.out.print(end.getTime() - start.getTime());
        System.out.println(" total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }
}
项目:jchampollion    文件:Corpus.java   
/**
 * getSentencesContaining returns a Vector of Integers containing the
 * numbers of the sentences that contain the given words in the source
 * language corpus.
 *
 * @param    words_    The words to be found
 * @return A Vector of the sentence numbers
 */
public Vector<String> getSentencesContaining(String words_) {
    Vector<String> sentenceNums = new Vector<>();

    words_ = requireAll(words_);

    try {
        Analyzer analyzer = new SimpleAnalyzer();

        Query query = QueryParser.parse(words_, "contents", analyzer);
        Hits hits = sourceSearcher.search(query);

        // Add the numbers of all the hits to the Vector
        for (int i = 0; i < hits.length(); i++) {
            Document sentence = hits.doc(i);
            sentenceNums.add(sentence.get("snum"));
            //DEBUG System.out.println(sentence.get("snum") + ": " + sentence.get("contents"));
        }
    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass()
                + "\n with message: " + e.getMessage());
    }

    return sentenceNums;
}
项目:jchampollion    文件:Corpus.java   
/**
 * numSentencesContaining returns the number of sentences containing the
 * given words.
 *
 * @param words_    The words to be found
 * @param searcher  The searcher to be searched.
 * @return The number of sentences containing the words
 */
public int numSentencesContaining(String words_, Searcher searcher) {
    int num = 0;

    words_ = requireAll(words_);
    //DEBUG System.out.println("Finding hits for " + words_);

    try {
        Analyzer analyzer = new SimpleAnalyzer();

        Query query = QueryParser.parse(words_, "contents", analyzer);
        Hits hits = searcher.search(query);

        num = hits.length();
    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass()
                + "\n with message: " + e.getMessage());
    }
    return num;
}
项目:t4f-data    文件:MultiFieldQueryParserTest.java   
public void testDefaultOperator() throws Exception {
  Query query = new MultiFieldQueryParser(Version.LUCENE_41,
                                          new String[]{"title", "subject"},
      new SimpleAnalyzer()).parse("development");

  Directory dir = TestUtil.getBookIndexDirectory();
  IndexSearcher searcher = new IndexSearcher(
                             dir,
                             true);
  TopDocs hits = searcher.search(query, 10);

  assertTrue(TestUtil.hitsIncludeTitle(
         searcher,
         hits,
         "Ant in Action"));

  assertTrue(TestUtil.hitsIncludeTitle(     //A
         searcher,                          //A
         hits,                              //A
         "Extreme Programming Explained")); //A
  searcher.close();
  dir.close();
}
项目:t4f-data    文件:MultiFieldQueryParserTest.java   
public void testSpecifiedOperator() throws Exception {
  Query query = MultiFieldQueryParser.parse(Version.LUCENE_41,
      "lucene",
      new String[]{"title", "subject"},
      new BooleanClause.Occur[]{BooleanClause.Occur.MUST,
                BooleanClause.Occur.MUST},
      new SimpleAnalyzer());

  Directory dir = TestUtil.getBookIndexDirectory();
  IndexSearcher searcher = new IndexSearcher(
                             dir,
                             true);
  TopDocs hits = searcher.search(query, 10);

  assertTrue(TestUtil.hitsIncludeTitle(
          searcher,
          hits,
          "Lucene in Action, Second Edition"));
  assertEquals("one and only one", 1, hits.scoreDocs.length);
  searcher.close();
  dir.close();
}
项目:t4f-data    文件:KeywordAnalyzerTest.java   
public void setUp() throws Exception {
  Directory directory = new RAMDirectory();

  IndexWriter writer = new IndexWriter(directory,
                                       new SimpleAnalyzer(), 
                                       IndexWriter.MaxFieldLength.UNLIMITED);

  Document doc = new Document();
  doc.add(new Field("partnum",
                    "Q36",
                    Field.Store.NO,
                    Field.Index.NOT_ANALYZED_NO_NORMS));   //A
  doc.add(new Field("description",
                    "Illidium Space Modulator",
                    Field.Store.YES,
                    Field.Index.ANALYZED));
  writer.addDocument(doc);

  writer.close();

  searcher = new IndexSearcher(directory);
}
项目:windup-rulesets    文件:HsearchUtil.java   
public QueryParser getQuery() throws  ParseException {

    Analyzer analyzer = new SimpleAnalyzer();
    QueryParser parser = new QueryParser(org.apache.lucene.util.Version.LUCENE_4_0, "title", analyzer);

    String querystr = "test*";
    Query query = parser.parse(querystr);
}
项目:t4f-data    文件:KeywordAnalyzerTest.java   
public void testBasicQueryParser() throws Exception {
  Query query = new QueryParser(Version.LUCENE_41,                //1
                                "description",                //1
                                new SimpleAnalyzer())            //1
                    .parse("partnum:Q36 AND SPACE");                //1
  assertEquals("note Q36 -> q",
               "+partnum:q +space", query.toString("description"));    //2
  assertEquals("doc not found :(", 0, TestUtil.hitCount(searcher, query));
}
项目:t4f-data    文件:KeywordAnalyzerTest.java   
public void testPerFieldAnalyzer() throws Exception {
  PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(
                                            new SimpleAnalyzer());
  analyzer.addAnalyzer("partnum", new KeywordAnalyzer());

  Query query = new QueryParser(Version.LUCENE_41,
                                "description", analyzer).parse(
              "partnum:Q36 AND SPACE");

  assertEquals("Q36 kept as-is",
            "+partnum:Q36 +space", query.toString("description"));  
  assertEquals("doc found!", 1, TestUtil.hitCount(searcher, query));
}
项目:accumulo-wikisearch    文件:WikipediaConfiguration.java   
public static Analyzer getAnalyzer(Configuration conf) throws IOException {
  Class<? extends Analyzer> analyzerClass = conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class);
  return ReflectionUtils.newInstance(analyzerClass, conf);
}
项目:jchampollion    文件:Corpus.java   
/**
 * Counts the intersection between the sentences containing S in the source
 * corpus and the sentences containing T in the target corpus.
 *
 * @param S
 *            The words in the source corpus, separated by spaces.
 * @param T
 *            The words in the target corpus, separated by spaces.
 * @return The number of sentences containing both all of the words in S and
 *         all of the words in T.
 */
public int countIntersections(String S, String T) {
    int retNum = 0;

    // Require all terms
    S = requireAll(S);
    T = requireAll(T);

    try {
        // Get all sentences for the source terms
        Analyzer sanalyzer = new SimpleAnalyzer();

        Query squery = QueryParser.parse(S, "contents", sanalyzer);
        Hits sHits = sourceSearcher.search(squery, new Sort("snum"));

        // Get all sentences for the target terms
        Analyzer tanalyzer = new SimpleAnalyzer();

        Query tquery = QueryParser.parse(T, "contents", tanalyzer);
        Hits tHits = targetSearcher.search(tquery, new Sort("snum"));

        int sCount = 0;
        int tCount = 0;
        // Compare the sentences, and count how many match
        while (sCount < sHits.length() && tCount < tHits.length()) {
            Document sSentence = sHits.doc(sCount);
            int sSentNum = Integer.valueOf(sSentence.get("snum"));

            Document tSentence = tHits.doc(tCount);
            int tSentNum = Integer.valueOf(tSentence.get("snum"));

            //DEBUG System.out.println("s " + sSentNum + "\tt " + tSentNum);
            if (sSentNum == tSentNum) {
                retNum++;
                sCount++;
                tCount++;
            } else if (sSentNum > tSentNum) {
                tCount++;
            } else if (sSentNum < tSentNum) {
                sCount++;
            }
        }

    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass()
                + "\n with message: " + e.getMessage());
    }

    return retNum;
}