Java 类org.apache.lucene.analysis.WhitespaceAnalyzer 实例源码

项目:bisis-v4    文件:BulkIndexer.java   
@Override
protected IndexWriter getIndexWriter() {
  if (++useCount % 1000 == 0) {
    close();
    writer = null;
  }
  if (writer == null) {
    try {
      boolean createIndex = true;
      File testIndexPath = new File(indexPath);
      if (!testIndexPath.exists())
        testIndexPath.mkdirs();
      if (testIndexPath.isDirectory()) {
        if (testIndexPath.list().length > 0)
          createIndex = false;
        writer = new IndexWriter(indexPath, new WhitespaceAnalyzer(), createIndex);
      }
    } catch (Exception ex) {
      log.fatal(ex);
    }
  }
  return writer;
}
项目:bisis-v4    文件:Retriever.java   
/**
  * Executes a select query.
  * @param query The Lucene query
  * @return An array of record IDs; an empty array if an error occured
* @throws IOException 
  */
 public int[] select(String query, String sortPrefix)  throws ParseException{
   try {
     WhitespaceAnalyzer sa= new WhitespaceAnalyzer();
     BooleanQuery.setMaxClauseCount(20000);//zbog heap-a
     QueryParser p = new QueryParser("KW", sa);
     p.setDefaultOperator(QueryParser.Operator.AND); //default operator je AND a ne OR kao sto je inace inicijalno
     Query q = p.parse(query);
     return select(q, sortPrefix);
   } catch (Exception ex) {
    if (ex instanceof ParseException )
        throw (ParseException)ex;
     log.warn(ex);
     return new int[0];
   }
 }
项目:bisis-v4    文件:Indexer.java   
/**
 * Returns a new Lucene index writer. Creates the index if necessary.  
 * @return
 */
protected IndexWriter getIndexWriter() {
  try {
    boolean createIndex = true;
    File testIndexPath = new File(indexPath);
    if (!testIndexPath.exists())
      testIndexPath.mkdirs();
    if (testIndexPath.isDirectory()) {
      if (testIndexPath.list().length > 0)
        createIndex = false;
      return new IndexWriter(indexPath, new WhitespaceAnalyzer(), createIndex);
    }
  } catch (Exception ex) {
    log.fatal(ex);
  }
  return null;
}
项目:knowledgestore    文件:LuceneDataStore.java   
@Override
    public void init() throws IOException, IllegalStateException {
        Files.createDirectories(Paths.get(mentionsFolder));
        Files.createDirectories(Paths.get(resourcesFolder));

        writingOperations.put(KS.RESOURCE, new AtomicInteger(0));
        writingOperations.put(KS.MENTION, new AtomicInteger(0));

        try {
            writers.put(KS.RESOURCE, new IndexWriter(FSDirectory.open(new File(resourcesFolder)), new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED));
            writers.put(KS.MENTION, new IndexWriter(FSDirectory.open(new File(mentionsFolder)), new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED));

//          writers.get(KS.RESOURCE).setUseCompoundFile(true);
//          writers.get(KS.MENTION).setUseCompoundFile(true);

            writers.get(KS.RESOURCE).optimize(MAX_LUCENE_SEGMENTS);
            writers.get(KS.MENTION).optimize(MAX_LUCENE_SEGMENTS);

            readers.put(KS.RESOURCE, writers.get(KS.RESOURCE).getReader());
            readers.put(KS.MENTION, writers.get(KS.MENTION).getReader());
        } catch (Exception e) {
            logger.error(e.getMessage());
        }
    }
项目:webdsl    文件:AbstractIndexManager.java   
protected static boolean clearIndex(File path) {
    try {
        if (path == null || !path.exists())
            return true; // if path doesnt exist, then there is nothing to
                            // clear

        FSDirectory indexDir = new FSDirectoryProvider().getDirectory();
        IndexWriter writer = new IndexWriter(indexDir.open(path),
                new IndexWriterConfig(Version.LUCENE_CURRENT,
                        new WhitespaceAnalyzer(Version.LUCENE_CURRENT)));
        writer.deleteAll();
        writer.close();
        return true;
    } catch (Exception ex) {
        org.webdsl.logging.Logger.error(
                "Error while clearing index on location: " + path, ex);
        return false;
    }

}
项目:webdsl    文件:AutoCompleter.java   
/**
 * Use a different index as the auto completer index or re-open
 * the existing index if <code>autocompleteIndex</code> is the same value
 * as given in the constructor.
 * @param autocompleteIndexDir the autocomplete directory to use
 * @throws AlreadyClosedException if the Autocompleter is already closed
 * @throws  IOException if autocompleter can not open the directory
 */
// TODO: we should make this final as it is called in the constructor
public void setAutoCompleteIndex(Directory autocompleteIndexDir) throws IOException {
  // this could be the same directory as the current autocompleteIndex
  // modifications to the directory should be synchronized
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    if (!IndexReader.indexExists(autocompleteIndexDir)) {
        IndexWriter writer = new IndexWriter(autocompleteIndexDir,
          new IndexWriterConfig(Version.LUCENE_CURRENT,
              new WhitespaceAnalyzer(Version.LUCENE_CURRENT)));
        writer.close();
    }
    swapSearcher(autocompleteIndexDir);
  }
}
项目:t4f-data    文件:MultiPhraseQueryTest.java   
protected void setUp() throws Exception {
  Directory directory = new RAMDirectory();
  IndexWriter writer = new IndexWriter(directory,
                                       new WhitespaceAnalyzer(Version.LUCENE_41),
                                       IndexWriter.MaxFieldLength.UNLIMITED);
  Document doc1 = new Document();
  doc1.add(new Field("field",
            "the quick brown fox jumped over the lazy dog",
            Field.Store.YES, Field.Index.ANALYZED));
  writer.addDocument(doc1);
  Document doc2 = new Document();
  doc2.add(new Field("field",
            "the fast fox hopped over the hound",
            Field.Store.YES, Field.Index.ANALYZED));
  writer.addDocument(doc2);
  writer.close();

  searcher = new IndexSearcher(directory);
}
项目:t4f-data    文件:DistanceSortingTest.java   
protected void setUp() throws Exception {
  directory = new RAMDirectory();
  IndexWriter writer =
      new IndexWriter(directory, new WhitespaceAnalyzer(Version.LUCENE_41),
                      IndexWriter.MaxFieldLength.UNLIMITED);
  addPoint(writer, "El Charro", "restaurant", 1, 2);
  addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9);
  addPoint(writer, "Los Betos", "restaurant", 9, 6);
  addPoint(writer, "Nico's Taco Shop", "restaurant", 3, 8);

  writer.close();

  searcher = new IndexSearcher(directory);

  query = new TermQuery(new Term("type", "restaurant"));
}
项目:t4f-data    文件:AnalysisParalysisTest.java   
public void testAnalyzer() throws Exception {
  Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
  String queryString = "category:/philosophy/eastern";

  Query query = new QueryParser(Version.LUCENE_41,
                                "contents",
                                analyzer).parse(queryString);
  assertEquals("path got split, yikes!",
               "category:\"philosophy eastern\"",
               query.toString("contents"));

  PerFieldAnalyzerWrapper perFieldAnalyzer =
                          new PerFieldAnalyzerWrapper(analyzer);
  perFieldAnalyzer.addAnalyzer("category",
                                     new WhitespaceAnalyzer(Version.LUCENE_41));
  query = new QueryParser(Version.LUCENE_41,
                          "contents",
                          perFieldAnalyzer).parse(queryString);
  assertEquals("leave category field alone",
               "category:/philosophy/eastern",
               query.toString("contents"));
}
项目:incubator-netbeans    文件:DocumentUtil.java   
public static Analyzer createAnalyzer() {
    final PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
    analyzer.addAnalyzer(DocumentUtil.FIELD_IDENTS, new WhitespaceAnalyzer());
    analyzer.addAnalyzer(DocumentUtil.FIELD_FEATURE_IDENTS, new WhitespaceAnalyzer());
    analyzer.addAnalyzer(DocumentUtil.FIELD_CASE_INSENSITIVE_FEATURE_IDENTS, new DocumentUtil.LCWhitespaceAnalyzer());
    return analyzer;
}
项目:bisis-v4    文件:QueryUtils.java   
public static Filter getQueryFilter(String query){
    try{
        WhitespaceAnalyzer sa= new WhitespaceAnalyzer();
        QueryParser p = new QueryParser("contents",sa);
        Query q = p.parse(query);
        Filter filter = new QueryWrapperFilter(q);
        return filter;
    }catch (Exception e){
        return null;
    }
}
项目:bisis-v4    文件:Retriever.java   
public List<String> selectExpand(String query, String prefix,String text){
  try {
   WhitespaceAnalyzer sa= new WhitespaceAnalyzer();
   BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
   QueryParser p = new QueryParser("contents", sa);
   Query q = p.parse(query);
   Searcher searcher = new IndexSearcher(indexPath);
   StopWatch clock=new StopWatch();
   clock.start();
   Hits hits = searcher.search(q);
   int n = hits.length();
   List <String> expandList = new ArrayList<String>();
   Field[] tmp = null;
   String pom="";
    for (int i = 0; i < n; i++) {
      tmp = hits.doc(i).getFields(prefix);
      if (tmp != null){
        for (int j = 0; j<tmp.length; j++){
          pom=tmp[j].stringValue().replace("0start0 ", "");
          pom=pom.replace(" 0end0", "");
          if(pom.startsWith(text)&&(!expandList.contains(pom))){
             expandList.add(pom);
          }
        } 
      }
    }
    clock.stop();
    searcher.close();
    return expandList;
  } catch (Exception ex) {
    log.fatal(ex);
    return null;
  }
}
项目:THUTag    文件:ExpandRankKE.java   
@Override
public void loadModel(String modelPath) throws IOException {
    docsSearcher = new IndexSearcher((new File(modelPath, "docs"))
            .getAbsolutePath());
    String[] fields = { "doc_id", "content", "user_id", "tag" };
    queryParser = new MultiFieldQueryParser(fields,
            new WhitespaceAnalyzer());
}
项目:webdsl    文件:AutoCompleter.java   
/**
 * Removes all terms from the auto complete index.
 * @throws IOException
 * @throws AlreadyClosedException if the Autocompleter is already closed
 */
public void clearIndex() throws IOException {
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    final Directory dir = this.autoCompleteIndex;
    final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
        Version.LUCENE_CURRENT,
        new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
        .setOpenMode(OpenMode.CREATE));
    writer.close();
    swapSearcher(dir);
  }
}
项目:webdsl    文件:AutoCompleter.java   
/**
  * Indexes the data from the given reader.
* @param reader Source index reader, from which autocomplete words are obtained for the defined field
* @param field the field of the source index reader to index for autocompletion
* @param mergeFactor mergeFactor to use when indexing
* @param ramMB the max amount or memory in MB to use
* @param optimize whether or not the autocomplete index should be optimized
  * @throws AlreadyClosedException if the Autocompleter is already closed
  * @throws IOException
  */
 public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException {
   synchronized (modifyCurrentIndexLock) {
     ensureOpen();
     final Directory dir = this.autoCompleteIndex;
     final Dictionary dict = new LuceneDictionary(reader, field);
     final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
     IndexSearcher indexSearcher = obtainSearcher();
     final List<IndexReader> readers = new ArrayList<IndexReader>();

     if (searcher.maxDoc() > 0) {
       ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
     }

     //clear the index
     writer.deleteAll();

     try {
       Iterator<String> iter = dict.getWordsIterator();

     while (iter.hasNext()) {
         String word = iter.next();

         // ok index the word
         Document doc = createDocument(word, reader.docFreq(new Term(field, word)));
         writer.addDocument(doc);
       }
     } finally {
       releaseSearcher(indexSearcher);
     }
     // close writer
     if (optimize)
       writer.optimize();
     writer.close();
     // also re-open the autocomplete index to see our own changes when the next suggestion
     // is fetched:
     swapSearcher(dir);
   }
 }
项目:gumtree-spoon-ast-diff    文件:left_TestSpans_1.3.java   
public void setUp() throws Exception {
  RAMDirectory directory = new RAMDirectory();
  IndexWriter writer= new IndexWriter(directory, new WhitespaceAnalyzer(), true);
  StringBuffer buffer = new StringBuffer();
  for (int i = 0; i < docFields.length; i++) {
    Document doc = new Document();
    doc.add(Field.Text(field, docFields[i]));
    writer.addDocument(doc);
  }
  writer.close();
  searcher = new IndexSearcher(directory);
}
项目:gumtree-spoon-ast-diff    文件:right_TestSpans_1.4.java   
public void setUp() throws Exception {
  RAMDirectory directory = new RAMDirectory();
  IndexWriter writer= new IndexWriter(directory, new WhitespaceAnalyzer(), true);
  for (int i = 0; i < docFields.length; i++) {
    Document doc = new Document();
    doc.add(Field.Text(field, docFields[i]));
    writer.addDocument(doc);
  }
  writer.close();
  searcher = new IndexSearcher(directory);
}
项目:t4f-data    文件:PhraseQueryTest.java   
protected void setUp() throws IOException {
  dir = new RAMDirectory();
  IndexWriter writer = new IndexWriter(dir,
                                       new WhitespaceAnalyzer(Version.LUCENE_41),
                                       IndexWriter.MaxFieldLength.UNLIMITED);
  Document doc = new Document();
  doc.add(new Field("field",                                    
            "the quick brown fox jumped over the lazy dog",     
            Field.Store.YES,                                    
            Field.Index.ANALYZED));                             
  writer.addDocument(doc);
  writer.close();

  searcher = new IndexSearcher(dir);
}
项目:t4f-data    文件:ScoreTest.java   
private void indexSingleFieldDocs(Field[] fields) throws Exception {
  IndexWriter writer = new IndexWriter(directory,
      new WhitespaceAnalyzer(Version.LUCENE_41), IndexWriter.MaxFieldLength.UNLIMITED);
  for (Field f : fields) {
    Document doc = new Document();
    doc.add(f);
    writer.addDocument(doc);
  }
  writer.merge(writer.getNextMerge());
  writer.close();
}
项目:t4f-data    文件:SpanQueryTest.java   
protected void setUp() throws Exception {
  directory = new RAMDirectory();

  analyzer = new WhitespaceAnalyzer(Version.LUCENE_41);
  IndexWriter writer = new IndexWriter(directory,
                                       analyzer,
                                       IndexWriter.MaxFieldLength.UNLIMITED);

  Document doc = new Document();
  doc.add(new Field("f",
      "the quick brown fox jumps over the lazy dog",
      Field.Store.YES, Field.Index.ANALYZED));
  writer.addDocument(doc);

  doc = new Document();
  doc.add(new Field("f",
      "the quick red fox jumps over the sleepy cat",
      Field.Store.YES, Field.Index.ANALYZED));
  writer.addDocument(doc);

  writer.close();

  searcher = new IndexSearcher(directory);
  reader = searcher.getIndexReader();

  quick = new SpanTermQuery(new Term("f", "quick"));
  brown = new SpanTermQuery(new Term("f", "brown"));
  red = new SpanTermQuery(new Term("f", "red"));
  fox = new SpanTermQuery(new Term("f", "fox"));
  lazy = new SpanTermQuery(new Term("f", "lazy"));
  sleepy = new SpanTermQuery(new Term("f", "sleepy"));
  dog = new SpanTermQuery(new Term("f", "dog"));
  cat = new SpanTermQuery(new Term("f", "cat"));
}
项目:t4f-data    文件:SecurityFilterTest.java   
protected void setUp() throws Exception {
  Directory directory = new RAMDirectory();
  IndexWriter writer = new IndexWriter(directory,
                                       new WhitespaceAnalyzer(Version.LUCENE_41),
                                       IndexWriter.MaxFieldLength.UNLIMITED);

  Document document = new Document();                  
  document.add(new Field("owner",                      
                         "elwood",                     
                         Field.Store.YES,              
                         Field.Index.NOT_ANALYZED));   
  document.add(new Field("keywords",                   
                         "elwood's sensitive info",    
                         Field.Store.YES,              
                         Field.Index.ANALYZED));       
  writer.addDocument(document);

  document = new Document();                           
  document.add(new Field("owner",                      
                         "jake",                       
                         Field.Store.YES,              
                         Field.Index.NOT_ANALYZED));   
  document.add(new Field("keywords",                   
                         "jake's sensitive info",      
                         Field.Store.YES,              
                         Field.Index.ANALYZED));       
  writer.addDocument(document);

  writer.close();
  searcher = new IndexSearcher(directory);
}
项目:t4f-data    文件:MultiSearcherTest.java   
public void setUp() throws Exception {
  String[] animals = { "aardvark", "beaver", "coati",
                     "dog", "elephant", "frog", "gila monster",
                     "horse", "iguana", "javelina", "kangaroo",
                     "lemur", "moose", "nematode", "orca",
                     "python", "quokka", "rat", "scorpion",
                     "tarantula", "uromastyx", "vicuna",
                     "walrus", "xiphias", "yak", "zebra"};

  Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_41);

  Directory aTOmDirectory = new RAMDirectory();     //
  Directory nTOzDirectory = new RAMDirectory();     //

  IndexWriter aTOmWriter = new IndexWriter(aTOmDirectory,
                                           analyzer,
                                           IndexWriter.MaxFieldLength.UNLIMITED);
  IndexWriter nTOzWriter = new IndexWriter(nTOzDirectory,
                                           analyzer,
                                           IndexWriter.MaxFieldLength.UNLIMITED);


  for (int i=animals.length - 1; i >= 0; i--) {
    Document doc = new Document();
    String animal = animals[i];
    doc.add(new Field("animal", animal, Field.Store.YES, Field.Index.NOT_ANALYZED));
    if (animal.charAt(0) < 'n') {
      aTOmWriter.addDocument(doc);                 //
    } else {                                       
      nTOzWriter.addDocument(doc);                 //
    }
  }

  aTOmWriter.close();
  nTOzWriter.close();

  searchers = new IndexSearcher[2];
  searchers[0] = new IndexSearcher(aTOmDirectory);
  searchers[1] = new IndexSearcher(nTOzDirectory);
}
项目:nakala    文件:ExcerptIndexerTest.java   
@Test
public void testIndex() throws Exception {
    String SB = PrePostProcessor.SENTENCE_BOUNDARY;
    String sb = SB.toLowerCase();
    String PUNCT = ExcerptIndexer.PUNCT;
    ExcerptIndexer indexer = new ExcerptIndexer(new WhitespaceAnalyzer(Version.LUCENE_36), PrePostProcessor.newInstance());
    TitledContentArray ses = new TitledContentArray();
    ses.add(new Review(new Id(1), "First Title", "First contents are here. This is the second sentence in first content.", 1.0));
    ses.add(new Review(new Id(3), "Second Title", "Second contents are here. This is the second sentence in second content.", 0.75));
    indexer.index(ses);
    IndexSearcher is = indexer.getIndexSearcher();
    assertEquals(2, is.maxDoc());
    Document doc = is.doc(0);
    assertEquals("1", doc.get(ExcerptIndexer.DESC_ID));
    System.out.println("Contents: " + doc.get(ExcerptIndexer.CONTENTS));
    System.out.println("Original: " + doc.get(ExcerptIndexer.ORIGINAL));
    assertEquals("first contents are here " + PUNCT + ' ' + sb + " this is the second sentence in first content " + PUNCT + ' ' + sb, doc.get(ExcerptIndexer.CONTENTS));
    assertEquals("First contents are here . " + SB + " This is the second sentence in first content . " + SB, doc.get(ExcerptIndexer.ORIGINAL));
    assertEquals("first title", doc.get(ExcerptIndexer.TITLE));
    assertEquals("First Title", doc.get(ExcerptIndexer.TITLE_ORIGINAL));

    doc = is.doc(1);
    assertEquals("3", doc.get(ExcerptIndexer.DESC_ID));
    System.out.println("Contents: " + doc.get(ExcerptIndexer.CONTENTS));
    System.out.println("Original: " + doc.get(ExcerptIndexer.ORIGINAL));
    assertEquals("second contents are here " + PUNCT + ' ' + sb + " this is the second sentence in second content " + PUNCT + ' ' + sb, doc.get(ExcerptIndexer.CONTENTS));
    assertEquals("Second contents are here . " + SB + " This is the second sentence in second content . " + SB, doc.get(ExcerptIndexer.ORIGINAL));
    assertEquals("second title", doc.get(ExcerptIndexer.TITLE));
    assertEquals("Second Title", doc.get(ExcerptIndexer.TITLE_ORIGINAL));
}
项目:bisis-v4    文件:Retriever.java   
public Result selectAll1(String query, String sortPrefix)throws ParseException{
  try {
    WhitespaceAnalyzer sa= new WhitespaceAnalyzer();
    Searcher searcher = new IndexSearcher(indexPath);
      BooleanQuery.setMaxClauseCount(20000);//zbog heap-a
      QueryParser p = new QueryParser("KW", sa);
      p.setDefaultOperator(QueryParser.Operator.AND); //default operator je AND a ne OR kao sto je inace inicijalno
      Query q = p.parse(query);
      Hits hits;
    if (sortPrefix == null || "".equals(sortPrefix))
      hits = searcher.search(q);
    else {
      int sortType = SortField.STRING;
      if ("RN_sort".equals(sortPrefix))
        sortType = SortField.INT;
      hits = searcher.search(q, new Sort(
          new SortField(sortPrefix, sortType)));
    }

    int n = hits.length();
    int[] retVal = new int[n];
    List<String> invs = new ArrayList<String>();
    Field[] tmp = null;

    for (int i = 0; i < n; i++) {
      String recordID = hits.doc(i).get("ID");
      retVal[i] = Integer.parseInt(recordID);
      tmp = hits.doc(i).getFields("IN");
      if (tmp != null){
        for (int j = 0; j<tmp.length; j++){
          invs.add(tmp[j].stringValue());
        } 
      }
    }
    searcher.close();
    Result result = new Result();
    result.setRecords(retVal);
    result.setInvs(invs);
    return result;
  } catch (Exception ex) {
    if (ex instanceof ParseException )
        throw (ParseException)ex;
    log.fatal(ex);
    return null;
  }
}
项目:THUTag    文件:TrainKnn.java   
public void buildIndexes(String input, File modelDir)
throws IOException {
  if (!modelDir.exists()) {
    modelDir.mkdir();
  }

  Set<String> whitelist = new HashSet<String>();
  Set<String> blacklist = new HashSet<String>();

  if (config.getProperty("whitelist", "").length() > 0) {
    whitelist.addAll(
        Arrays.asList(config.getProperty("whitelist", "").split(",")));
  }
  if (config.getProperty("blacklist", "").length() > 0) {
    blacklist.addAll(
        Arrays.asList(config.getProperty("blacklist", "").split(",")));
  }

  WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
  JsonUtil J = new JsonUtil();    
  IndexWriter docsIndex =
    new IndexWriter(new File(modelDir, "docs"), analyzer);

  RecordReader reader = new RecordReader(input);
  while (reader.next()) {
    Post p = J.fromJson(reader.value(), Post.class);
    if (blacklist.contains(p.getUserId())) {
      continue;
    }
    if (whitelist.size() > 0 && !whitelist.contains(p.getUserId())) {
      continue;
    }
    if (fold.length() > 0 && p.getExtras().equals(fold)) {
      continue;
    }
    Document contentDoc = makeContentDoc(p);
    docsIndex.addDocument(contentDoc);
    if (reader.numRead() % 5000 == 0) {
      LOG.info("Added " + reader.numRead() + " documents.");
    }
  }
  reader.close();

  LOG.info("Optimizing docs index...");
  docsIndex.optimize();
  docsIndex.close();
}
项目:THUTag    文件:TrainExpandRank.java   
public void buildIndexes(String input, File modelDir)
throws IOException {
  if (!modelDir.exists()) {
    modelDir.mkdir();
  }

  Set<String> whitelist = new HashSet<String>();
  Set<String> blacklist = new HashSet<String>();

  if (config.getProperty("whitelist", "").length() > 0) {
    whitelist.addAll(
        Arrays.asList(config.getProperty("whitelist", "").split(",")));
  }
  if (config.getProperty("blacklist", "").length() > 0) {
    blacklist.addAll(
        Arrays.asList(config.getProperty("blacklist", "").split(",")));
  }

  WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
  JsonUtil J = new JsonUtil();    
  IndexWriter docsIndex =
    new IndexWriter(new File(modelDir, "docs"), analyzer);

  RecordReader reader = new RecordReader(input);
  while (reader.next()) {
    //Post p = J.fromJson(reader.value(), Post.class);
    KeywordPost p = J.fromJson(reader.value(), KeywordPost.class);
    if (blacklist.contains(p.getUserId())) {
      continue;
    }
    if (whitelist.size() > 0 && !whitelist.contains(p.getUserId())) {
      continue;
    }
    if (fold.length() > 0 && p.getExtras().equals(fold)) {
      continue;
    }
    Document contentDoc = makeContentDoc(p);
    docsIndex.addDocument(contentDoc);
    if (reader.numRead() % 5000 == 0) {
      LOG.info("Added " + reader.numRead() + " documents.");
    }
  }
  reader.close();

  LOG.info("Optimizing docs index...");
  docsIndex.optimize();
  docsIndex.close();
}
项目:THUTag    文件:KnnTagSuggest.java   
@Override
public void loadModel(String modelPath) throws IOException {
  docsSearcher =  new IndexSearcher((new File(modelPath, "docs")).getAbsolutePath());
  String [] fields = {"doc_id", "content", "user_id", "tag"};
  queryParser = new MultiFieldQueryParser(fields, new WhitespaceAnalyzer());
}
项目:knowledgestore    文件:HadoopMultiFileStore.java   
@Override
public void init() throws IOException {

    // Create root folder if missing
    if (!this.fileSystem.exists(this.rootPath)) {
        LOGGER.debug("Creating root folder {}", this.rootPath);
        if (!this.fileSystem.mkdirs(this.rootPath)) {
            throw new IOException("Cannot create root folder " + this.luceneFolder);
        }
    }

    // Create sub-folder for small files, if missing
    if (!this.fileSystem.exists(this.smallFilesPath)) {
        LOGGER.debug("Creating small files folder {}", this.smallFilesPath);
        if (!this.fileSystem.mkdirs(this.smallFilesPath)) {
            throw new IOException("Cannot create small files folder " + this.smallFilesPath);
        }
    }

    // Create folder for lucene index, if missing
    if (!this.luceneFolder.exists()) {
        LOGGER.debug("Created lucene folder {}", this.luceneFolder);
        if (!this.luceneFolder.mkdirs()) {
            throw new IOException("Cannot create lucene folder " + this.luceneFolder);
        }
    }

    // Initialize Lucene writer and reader
    this.luceneWriter = new IndexWriter(FSDirectory.open(this.luceneFolder),
            new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
    this.luceneReader = this.luceneWriter.getReader();

    // Mark the component as active
    this.active.set(true);

    // Schedule periodic cleanup
    this.cleanupFuture = Data.getExecutor().scheduleWithFixedDelay(new Runnable() {

        @Override
        public void run() {
            try {
                merge();
                purge();
                indexOptimize();
            } catch (final Throwable ex) {
                LOGGER.warn("Periodic cleanup failed", ex);
            }
        }

    }, this.cleanupPeriod, this.cleanupPeriod, TimeUnit.MILLISECONDS);
}
项目:imhotep    文件:TestCloseSessionDuringFTGS.java   
@Test
    public void testCloseSessionDuringFTGS() throws ImhotepOutOfMemoryException, IOException, InterruptedException {
        String tempDir = Files.getTempDirectory("asdf", "");
        try {
            IndexWriter w = new IndexWriter(tempDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Random rand = new Random();
            for (int i = 0; i < 1000000; ++i) {
                int numTerms = rand.nextInt(5) + 5;
                Document doc = new Document();
                for (int t = 0; t < numTerms; ++t) {
                    doc.add(new Field("sf1", Integer.toString(rand.nextInt(10000)), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
                }
                w.addDocument(doc);
            }

            w.close();

            final AtomicBoolean closed = new AtomicBoolean(false);
            FlamdexReader r = new LuceneFlamdexReader(IndexReader.open(tempDir)) {
                @Override
                public void close() throws IOException {
                    super.close();
                    closed.set(true);
                }
            };
            final ExecutorService executor = Executors.newCachedThreadPool();
            try {
                ImhotepSession session =
                        new MTImhotepMultiSession(new ImhotepLocalSession[] { new ImhotepLocalSession(r) },
                                                  new MemoryReservationContext(
                                                                               new ImhotepMemoryPool(
                                                                                                     Long.MAX_VALUE)),
                                                  executor, null);
//                FTGSIterator iter = session.getFTGSIterator(new String[]{}, new String[]{"sf1"}); //TODO fix this
                session.close();
                assertTrue(closed.get());
            } finally {
                executor.shutdown();
            }
        } finally {
            Files.delete(tempDir);
        }
    }
项目:CophiAlignment    文件:LuceneIndexMaker.java   
public void init() throws Exception{
    NIOFSDirectory spellIndexDirectory =new NIOFSDirectory(dirDict, NoLockFactory.getNoLockFactory());
    spellchecker = new SpellChecker(spellIndexDirectory);
    iwc=new IndexWriterConfig(Version.LUCENE_36,new WhitespaceAnalyzer(Version.LUCENE_36));

}
项目:t4f-data    文件:QueryParserTest.java   
protected void setUp() throws Exception {
  analyzer = new WhitespaceAnalyzer(Version.LUCENE_41);
  dir = TestUtil.getBookIndexDirectory();
  searcher = new IndexSearcher(dir);
}
项目:t4f-data    文件:NumericQueryParserTest.java   
protected void setUp() throws Exception {
  analyzer = new WhitespaceAnalyzer(Version.LUCENE_41);
  dir = TestUtil.getBookIndexDirectory();
  searcher = new IndexSearcher(dir, true);
}
项目:t4f-data    文件:SpatialLuceneExample.java   
SpatialLuceneExample() throws IOException {
  directory = new RAMDirectory();
  writer = new IndexWriter(directory, new WhitespaceAnalyzer(Version.LUCENE_41),
                           MaxFieldLength.UNLIMITED);
}