protected long syncSlots(FSDataOutputStream stream, ByteSlot[] slots, int offset, int count) throws IOException { long totalSynced = 0; for (int i = 0; i < count; ++i) { ByteSlot data = slots[offset + i]; data.writeTo(stream); totalSynced += data.size(); } if (useHsync) { stream.hsync(); } else { stream.hflush(); } sendPostSyncSignal(); if (LOG.isTraceEnabled()) { LOG.trace("Sync slots=" + count + '/' + slots.length + ", flushed=" + StringUtils.humanSize(totalSynced)); } return totalSynced; }
public void visitMeta(final RegionStateVisitor visitor) throws IOException { MetaTableAccessor.fullScanRegions(master.getConnection(), new MetaTableAccessor.Visitor() { final boolean isDebugEnabled = LOG.isDebugEnabled(); @Override public boolean visit(final Result r) throws IOException { if (r != null && !r.isEmpty()) { long st = 0; if (LOG.isTraceEnabled()) { st = System.currentTimeMillis(); } visitMetaEntry(visitor, r); if (LOG.isTraceEnabled()) { long et = System.currentTimeMillis(); LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st)); } } else if (isDebugEnabled) { LOG.debug("NULL result from meta - ignoring but this is strange."); } return true; } }); }
public void joinCluster() throws IOException { final long startTime = System.currentTimeMillis(); LOG.debug("Joining cluster..."); // Scan hbase:meta to build list of existing regions, servers, and assignment loadMeta(); for (int i = 0; master.getServerManager().countOfRegionServers() < 1; ++i) { LOG.info("Waiting for RegionServers to join; current count=" + master.getServerManager().countOfRegionServers()); Threads.sleep(250); } LOG.info("Number of RegionServers=" + master.getServerManager().countOfRegionServers()); boolean failover = processofflineServersWithOnlineRegions(); // Start the RIT chore master.getMasterProcedureExecutor().addChore(this.ritChore); LOG.info(String.format("Joined the cluster in %s, failover=%s", StringUtils.humanTimeDiff(System.currentTimeMillis() - startTime), failover)); }
private void testAssign(final MockRSExecutor executor, final int nregions) throws Exception { rsDispatcher.setMockRsExecutor(executor); AssignProcedure[] assignments = new AssignProcedure[nregions]; long st = System.currentTimeMillis(); bulkSubmit(assignments); for (int i = 0; i < assignments.length; ++i) { ProcedureTestingUtility.waitProcedure( master.getMasterProcedureExecutor(), assignments[i]); assertTrue(assignments[i].toString(), assignments[i].isSuccess()); } long et = System.currentTimeMillis(); float sec = ((et - st) / 1000.0f); LOG.info(String.format("[T] Assigning %dprocs in %s (%.2fproc/sec)", assignments.length, StringUtils.humanTimeDiff(et - st), assignments.length / sec)); }
public void awaitTermination() { try { final long startTime = EnvironmentEdgeManager.currentTime(); for (int i = 0; isAlive(); ++i) { sendStopSignal(); join(250); // Log every two seconds; send interrupt too. if (i > 0 && (i % 8) == 0) { LOG.warn("Waiting termination of thread " + getName() + ", " + StringUtils.humanTimeDiff(EnvironmentEdgeManager.currentTime() - startTime) + "; sending interrupt"); interrupt(); } } } catch (InterruptedException e) { LOG.warn(getName() + " join wait got interrupted", e); } }
protected long syncSlots(final FSDataOutputStream stream, final ByteSlot[] slots, final int offset, final int count) throws IOException { long totalSynced = 0; for (int i = 0; i < count; ++i) { final ByteSlot data = slots[offset + i]; data.writeTo(stream); totalSynced += data.size(); } syncStream(stream); sendPostSyncSignal(); if (LOG.isTraceEnabled()) { LOG.trace("Sync slots=" + count + '/' + syncMaxSlot + ", flushed=" + StringUtils.humanSize(totalSynced)); } return totalSynced; }
@InterfaceAudience.Private protected synchronized boolean setTimeoutFailure() { if (state == ProcedureState.WAITING_TIMEOUT) { long timeDiff = EnvironmentEdgeManager.currentTime() - lastUpdate; setFailure("ProcedureExecutor", new TimeoutIOException( "Operation timed out after " + StringUtils.humanTimeDiff(timeDiff))); return true; } return false; }
public String dumpReplicationSummary() { StringBuilder sb = new StringBuilder(); if (!deletedQueues.isEmpty()) { sb.append("Found " + deletedQueues.size() + " deleted queues" + ", run hbck -fixReplication in order to remove the deleted replication queues\n"); for (String deletedQueue : deletedQueues) { sb.append(" " + deletedQueue + "\n"); } } if (!deadRegionServers.isEmpty()) { sb.append("Found " + deadRegionServers.size() + " dead regionservers" + ", restart one regionserver to transfer the queues of dead regionservers\n"); for (String deadRs : deadRegionServers) { sb.append(" " + deadRs + "\n"); } } if (!peersQueueSize.isEmpty()) { sb.append("Dumping all peers's number of WALs in replication queue\n"); for (Map.Entry<String, Long> entry : peersQueueSize.asMap().entrySet()) { sb.append(" PeerId: " + entry.getKey() + " , sizeOfLogQueue: " + entry.getValue() + "\n"); } } sb.append(" Total size of WALs on HDFS: " + StringUtils.humanSize(totalSizeOfWALs) + "\n"); if (numWalsNotFound > 0) { sb.append(" ERROR: There are " + numWalsNotFound + " WALs not found!!!\n"); } return sb.toString(); }
private String formatQueue(ServerName regionserver, ReplicationQueueStorage queueStorage, ReplicationQueueInfo queueInfo, String queueId, List<String> wals, boolean isDeleted, boolean hdfs) throws Exception { StringBuilder sb = new StringBuilder(); List<ServerName> deadServers; sb.append("Dumping replication queue info for RegionServer: [" + regionserver + "]" + "\n"); sb.append(" Queue znode: " + queueId + "\n"); sb.append(" PeerID: " + queueInfo.getPeerId() + "\n"); sb.append(" Recovered: " + queueInfo.isQueueRecovered() + "\n"); deadServers = queueInfo.getDeadRegionServers(); if (deadServers.isEmpty()) { sb.append(" No dead RegionServers found in this queue." + "\n"); } else { sb.append(" Dead RegionServers: " + deadServers + "\n"); } sb.append(" Was deleted: " + isDeleted + "\n"); sb.append(" Number of WALs in replication queue: " + wals.size() + "\n"); peersQueueSize.addAndGet(queueInfo.getPeerId(), wals.size()); for (String wal : wals) { long position = queueStorage.getWALPosition(regionserver, queueInfo.getPeerId(), wal); sb.append(" Replication position for " + wal + ": " + (position > 0 ? position : "0" + " (not started or nothing to replicate)") + "\n"); } if (hdfs) { FileSystem fs = FileSystem.get(getConf()); sb.append(" Total size of WALs on HDFS for this queue: " + StringUtils.humanSize(getTotalWALSize(fs, wals, regionserver)) + "\n"); } return sb.toString(); }
private void acceptPlan(final HashMap<RegionInfo, RegionStateNode> regions, final Map<ServerName, List<RegionInfo>> plan) throws HBaseIOException { final ProcedureEvent[] events = new ProcedureEvent[regions.size()]; final long st = System.currentTimeMillis(); if (plan == null) { throw new HBaseIOException("unable to compute plans for regions=" + regions.size()); } if (plan.isEmpty()) return; int evcount = 0; for (Map.Entry<ServerName, List<RegionInfo>> entry: plan.entrySet()) { final ServerName server = entry.getKey(); for (RegionInfo hri: entry.getValue()) { final RegionStateNode regionNode = regions.get(hri); regionNode.setRegionLocation(server); events[evcount++] = regionNode.getProcedureEvent(); } } ProcedureEvent.wakeEvents(getProcedureScheduler(), events); final long et = System.currentTimeMillis(); if (LOG.isTraceEnabled()) { LOG.trace("ASSIGN ACCEPT " + events.length + " -> " + StringUtils.humanTimeDiff(et - st)); } }
@Override protected boolean shouldSplit() { boolean force = region.shouldForceSplit(); boolean foundABigStore = false; // Get count of regions that have the same common table as this.region int tableRegionsCount = getCountOfCommonTableRegions(); // Get size to check long sizeToCheck = getSizeToCheck(tableRegionsCount); for (HStore store : region.getStores()) { // If any of the stores is unable to split (eg they contain reference files) // then don't split if (!store.canSplit()) { return false; } // Mark if any store is big enough long size = store.getSize(); if (size > sizeToCheck) { LOG.debug("ShouldSplit because " + store.getColumnFamilyName() + " size=" + StringUtils.humanSize(size) + ", sizeToCheck=" + StringUtils.humanSize(sizeToCheck) + ", regionsWithCommonTable=" + tableRegionsCount); foundABigStore = true; } } return foundABigStore || force; }
protected long getFlushSizeLowerBound(HRegion region) { int familyNumber = region.getTableDescriptor().getColumnFamilyCount(); // For multiple families, lower bound is the "average flush size" by default // unless setting in configuration is larger. long flushSizeLowerBound = region.getMemStoreFlushSize() / familyNumber; long minimumLowerBound = getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN); if (minimumLowerBound > flushSizeLowerBound) { flushSizeLowerBound = minimumLowerBound; } // use the setting in table description if any String flushedSizeLowerBoundString = region.getTableDescriptor().getValue(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND); if (flushedSizeLowerBoundString == null) { LOG.debug("No {} set in table {} descriptor;" + "using region.getMemStoreFlushSize/# of families ({}) instead.", HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND, region.getTableDescriptor().getTableName(), StringUtils.humanSize(flushSizeLowerBound) + ")"); } else { try { flushSizeLowerBound = Long.parseLong(flushedSizeLowerBoundString); } catch (NumberFormatException nfe) { // fall back for fault setting LOG.warn("Number format exception parsing {} for table {}: {}, {}; " + "using region.getMemStoreFlushSize/# of families ({}) instead.", HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND, region.getTableDescriptor().getTableName(), flushedSizeLowerBoundString, nfe, flushSizeLowerBound); } } return flushSizeLowerBound; }
@Test public void testPerfSingleThread() { final TableName TABLE_NAME = TableName.valueOf("testPerf"); final int NRUNS = 1 * 1000000; // 1M final RegionStates stateMap = new RegionStates(); long st = System.currentTimeMillis(); for (int i = 0; i < NRUNS; ++i) { stateMap.createRegionStateNode(createRegionInfo(TABLE_NAME, i)); } long et = System.currentTimeMillis(); LOG.info(String.format("PERF SingleThread: %s %s/sec", StringUtils.humanTimeDiff(et - st), StringUtils.humanSize(NRUNS / ((et - st) / 1000.0f)))); }
private int checkForStuckWorkers() { // check if any of the worker is stuck int stuckCount = 0; for (WorkerThread worker: workerThreads) { if (worker.getCurrentRunTime() < stuckThreshold) { continue; } // WARN the worker is stuck stuckCount++; LOG.warn("Worker stuck " + worker + " run time " + StringUtils.humanTimeDiff(worker.getCurrentRunTime())); } return stuckCount; }
public void awaitTermination() { try { final long startTime = EnvironmentEdgeManager.currentTime(); for (int i = 0; isAlive(); ++i) { sendStopSignal(); join(250); if (i > 0 && (i % 8) == 0) { LOG.warn("Waiting termination of thread " + getName() + ", " + StringUtils.humanTimeDiff(EnvironmentEdgeManager.currentTime() - startTime)); } } } catch (InterruptedException e) { LOG.warn(getName() + " join wait got interrupted", e); } }
/** * Called by the ProcedureExecutor when the timeout set by setTimeout() is expired. * @return true to let the framework handle the timeout as abort, * false in case the procedure handled the timeout itself. */ protected synchronized boolean setTimeoutFailure(final TEnvironment env) { if (state == ProcedureState.WAITING_TIMEOUT) { long timeDiff = EnvironmentEdgeManager.currentTime() - lastUpdate; setFailure("ProcedureExecutor", new TimeoutIOException( "Operation timed out after " + StringUtils.humanTimeDiff(timeDiff))); return true; } return false; }
@Ignore @Test // REENABLE after merge of // https://github.com/google/protobuf/issues/2228#issuecomment-252058282 public void testEntrySizeLimit() throws Exception { final int NITEMS = 20; for (int i = 1; i <= NITEMS; ++i) { final byte[] data = new byte[256 << i]; LOG.info(String.format("Writing %s", StringUtils.humanSize(data.length))); TestProcedure proc = new TestProcedure(i, 0, data); procStore.insert(proc, null); } // check that we are able to read the big proc-blobs ProcedureTestingUtility.storeRestartAndAssert(procStore, NITEMS, NITEMS, 0, 0); }
private void writeWals() throws IOException { List<Integer> procStates = shuffleProcWriteSequence(); TestProcedure[] procs = new TestProcedure[numProcs + 1]; // 0 is not used. int numProcsPerWal = numWals > 0 ? procStates.size() / numWals : Integer.MAX_VALUE; long startTime = currentTimeMillis(); long lastTime = startTime; for (int i = 0; i < procStates.size(); ++i) { int procId = procStates.get(i); if (procId < 0) { store.delete(procs[-procId].getProcId()); procs[-procId] = null; } else if (procs[procId] == null) { procs[procId] = new TestProcedure(procId, 0); procs[procId].setData(serializedState); store.insert(procs[procId], null); } else { store.update(procs[procId]); } if (i > 0 && i % numProcsPerWal == 0) { long currentTime = currentTimeMillis(); System.out.println("Forcing wall roll. Time taken on last WAL: " + (currentTime - lastTime) / 1000.0f + " sec"); store.rollWriterForTesting(); lastTime = currentTime; } } long timeTaken = currentTimeMillis() - startTime; System.out.println("\n\nDone writing WALs.\nNum procs : " + numProcs + "\nTotal time taken : " + StringUtils.humanTimeDiff(timeTaken) + "\n\n"); }
@VisibleForTesting @InterfaceAudience.Private public void setOwner(final String owner) { this.owner = StringUtils.isEmpty(owner) ? null : owner; }
private void syncLoop() throws Throwable { inSync.set(false); lock.lock(); try { while (isRunning()) { try { // Wait until new data is available if (slotIndex == 0) { if (!loading.get()) { periodicRoll(); } if (LOG.isTraceEnabled()) { float rollTsSec = getMillisFromLastRoll() / 1000.0f; LOG.trace(String.format("Waiting for data. flushed=%s (%s/sec)", StringUtils.humanSize(totalSynced.get()), StringUtils.humanSize(totalSynced.get() / rollTsSec))); } waitCond.await(getMillisToNextPeriodicRoll(), TimeUnit.MILLISECONDS); if (slotIndex == 0) { // no data.. probably a stop() or a periodic roll continue; } } // Wait SYNC_WAIT_MSEC or the signal of "slots full" before flushing long syncWaitSt = System.currentTimeMillis(); if (slotIndex != slots.length) { slotCond.await(syncWaitMsec, TimeUnit.MILLISECONDS); } long syncWaitMs = System.currentTimeMillis() - syncWaitSt; if (LOG.isTraceEnabled() && (syncWaitMs > 10 || slotIndex < slots.length)) { float rollSec = getMillisFromLastRoll() / 1000.0f; LOG.trace(String.format("Sync wait %s, slotIndex=%s , totalSynced=%s (%s/sec)", StringUtils.humanTimeDiff(syncWaitMs), slotIndex, StringUtils.humanSize(totalSynced.get()), StringUtils.humanSize(totalSynced.get() / rollSec))); } inSync.set(true); totalSynced.addAndGet(syncSlots()); slotIndex = 0; inSync.set(false); } catch (InterruptedException e) { Thread.currentThread().interrupt(); sendAbortProcessSignal(); syncException.compareAndSet(null, e); throw e; } catch (Throwable t) { syncException.compareAndSet(null, t); throw t; } finally { syncCond.signalAll(); } } } finally { lock.unlock(); } }
@Override protected int doWork() throws Exception { procedureScheduler = new MasterProcedureScheduler(UTIL.getConfiguration()); procedureScheduler.start(); setupOperations(); final Thread[] threads = new Thread[numThreads]; for (int i = 0; i < numThreads; ++i) { threads[i] = new AddProcsWorker(); } final long addBackTime = runThreads(threads); System.out.println("Added " + numOps + " procedures to scheduler."); for (int i = 0; i < numThreads; ++i) { threads[i] = new PollAndLockWorker(); } final long pollTime = runThreads(threads); procedureScheduler.stop(); final float pollTimeSec = pollTime / 1000.0f; final float addBackTimeSec = addBackTime / 1000.0f; System.out.println("******************************************"); System.out.println("Time - addBack : " + StringUtils.humanTimeDiff(addBackTime)); System.out.println("Ops/sec - addBack : " + StringUtils.humanSize(numOps / addBackTimeSec)); System.out.println("Time - poll : " + StringUtils.humanTimeDiff(pollTime)); System.out.println("Ops/sec - poll : " + StringUtils.humanSize(numOps / pollTimeSec)); System.out.println("Num Operations : " + numOps); System.out.println(); System.out.println("Completed : " + completed.get()); System.out.println("Yield : " + yield.get()); System.out.println(); System.out.println("Num Tables : " + numTables); System.out.println("Regions per table : " + regionsPerTable); System.out.println("Operations type : " + opsType); System.out.println("Threads : " + numThreads); System.out.println("******************************************"); System.out.println("Raw format for scripts"); System.out.println(String.format("RESULT [%s=%s, %s=%s, %s=%s, %s=%s, %s=%s, " + "num_yield=%s, time_addback_ms=%s, time_poll_ms=%s]", NUM_OPERATIONS_OPTION.getOpt(), numOps, OPS_TYPE_OPTION.getOpt(), opsType, NUM_TABLES_OPTION.getOpt(), numTables, REGIONS_PER_TABLE_OPTION.getOpt(), regionsPerTable, NUM_THREADS_OPTION.getOpt(), numThreads, yield.get(), addBackTime, pollTime)); return 0; }
/** * Start the procedure executor. * It calls ProcedureStore.recoverLease() and ProcedureStore.load() to * recover the lease, and ensure a single executor, and start the procedure * replay to resume and recover the previous pending and in-progress perocedures. * * @param numThreads number of threads available for procedure execution. * @param abortOnCorruption true if you want to abort your service in case * a corrupted procedure is found on replay. otherwise false. */ public void start(int numThreads, boolean abortOnCorruption) throws IOException { if (running.getAndSet(true)) { LOG.warn("Already running"); return; } // We have numThreads executor + one timer thread used for timing out // procedures and triggering periodic procedures. this.corePoolSize = numThreads; LOG.info("Starting {} Workers (bigger of cpus/4 or 16)", corePoolSize); // Create the Thread Group for the executors threadGroup = new ThreadGroup("PEWorkerGroup"); // Create the timeout executor timeoutExecutor = new TimeoutExecutorThread(threadGroup); // Create the workers workerId.set(0); workerThreads = new CopyOnWriteArrayList<>(); for (int i = 0; i < corePoolSize; ++i) { workerThreads.add(new WorkerThread(threadGroup)); } long st, et; // Acquire the store lease. st = EnvironmentEdgeManager.currentTime(); store.recoverLease(); et = EnvironmentEdgeManager.currentTime(); LOG.info(String.format("Recovered %s lease in %s", store.getClass().getSimpleName(), StringUtils.humanTimeDiff(et - st))); // start the procedure scheduler scheduler.start(); // TODO: Split in two steps. // TODO: Handle corrupted procedures (currently just a warn) // The first one will make sure that we have the latest id, // so we can start the threads and accept new procedures. // The second step will do the actual load of old procedures. st = EnvironmentEdgeManager.currentTime(); load(abortOnCorruption); et = EnvironmentEdgeManager.currentTime(); LOG.info(String.format("Loaded %s in %s, um pid=", store.getClass().getSimpleName(), StringUtils.humanTimeDiff(et - st))); // Start the executors. Here we must have the lastProcId set. if (LOG.isTraceEnabled()) { LOG.trace("Start workers " + workerThreads.size()); } timeoutExecutor.start(); for (WorkerThread worker: workerThreads) { worker.start(); } // Internal chores timeoutExecutor.add(new WorkerMonitor()); // Add completed cleaner chore addChore(new CompletedProcedureCleaner(conf, store, completed, nonceKeysToProcIdsMap)); }