/** * Force text extraction of every document in the repository */ public void rebuildWorker(MassIndexerProgressMonitor monitor) throws PathNotFoundException, DatabaseException, InterruptedException { if (running) { log.warn("*** Text extraction already running ***"); } else { running = true; log.debug("*** Begin massive text extraction ***"); try { // Clear pending extraction queue NodeDocumentDAO.getInstance().resetAllPendingExtractionFlags(); // Process queue while (NodeDocumentDAO.getInstance().hasPendingExtractions()) { processQueue(monitor, Config.MANAGED_TEXT_EXTRACTION_BATCH); Thread.sleep(750); System.gc(); } } finally { running = false; } log.debug("*** End massive text extraction ***"); } }
/** * Process queue concurrent */ private void processConcurrent(MassIndexerProgressMonitor monitor, int maxResults) { log.info("processConcurrent({}, {})", monitor, maxResults); long begin = System.currentTimeMillis(); try { ThreadPoolManager threadPool = new ThreadPoolManager(Config.MANAGED_TEXT_EXTRACTION_POOL_THREADS, "TextExtractorWorker"); for (TextExtractorWork pendExts : NodeDocumentDAO.getInstance().getPendingExtractions(maxResults)) { threadPool.add(new TextExtractorThread(pendExts)); inProgress.add(pendExts); } threadPool.shutdown(); } catch (DatabaseException e) { log.warn(e.getMessage(), e); } finally { inProgress.clear(); } SystemProfiling.log(String.valueOf(maxResults), System.currentTimeMillis() - begin); log.trace("processConcurrent.Time: {}", System.currentTimeMillis() - begin); }
/** * Process text extraction pending queue */ private void processQueue(MassIndexerProgressMonitor monitor, int maxResults) { if (Config.MANAGED_TEXT_EXTRACTION_CONCURRENT) { log.debug("Processing queue concurrently with {} processors", Config.AVAILABLE_PROCESSORS); processConcurrent(monitor, maxResults); } else { processSerial(monitor, maxResults); } }
public void indexDatabaseContent() { final FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager); final MassIndexerProgressMonitor indexMonitor = new SimpleIndexingProgressMonitor(10); try { final MassIndexer indexer = fullTextEntityManager.createIndexer(); indexer.threadsToLoadObjects(1); indexer.progressMonitor(indexMonitor); indexer.startAndWait(); } catch (InterruptedException e) { logger.error("Error indexing current database content", e); } logger.info("Succesfully indexed database content for full text search"); }