/** * Get region info from local cluster. */ Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException { ClusterStatus status = admin.getClusterStatus(); Collection<ServerName> regionServers = status.getServers(); Map<ServerName, List<String>> mm = new HashMap<ServerName, List<String>>(); for (ServerName hsi : regionServers) { AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi); // list all online regions from this region server List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server); List<String> regionNames = new ArrayList<String>(); for (HRegionInfo hri : regions) { regionNames.add(hri.getRegionNameAsString()); } mm.put(hsi, regionNames); } return mm; }
private ClusterStatus mockCluster(ServerLoad[] servers) { List<ServerName> serverNames = new ArrayList<ServerName>(); ClusterStatus clusterStatus = Mockito.mock(ClusterStatus.class); when(clusterStatus.getServers()).thenReturn(serverNames); int serverCounter = 0; for (ServerLoad server : servers) { ServerName serverName = mock(ServerName.class); when(serverName.getServerName()).thenReturn("server" + (serverCounter++)); serverNames.add(serverName); when(clusterStatus.getLoad(serverName)).thenReturn(server); } return clusterStatus; }
/** Returns current region servers - active master */ protected ServerName[] getCurrentServers() throws IOException { ClusterStatus clusterStatus = cluster.getClusterStatus(); Collection<ServerName> regionServers = clusterStatus.getServers(); int count = regionServers == null ? 0 : regionServers.size(); if (count <= 0) { return new ServerName [] {}; } ServerName master = clusterStatus.getMaster(); if (master == null || !regionServers.contains(master)) { return regionServers.toArray(new ServerName[count]); } if (count == 1) { return new ServerName [] {}; } ArrayList<ServerName> tmp = new ArrayList<ServerName>(count); tmp.addAll(regionServers); tmp.remove(master); return tmp.toArray(new ServerName[count-1]); }
@Override public void perform() throws Exception { LOG.info("Performing action: Restart region server holding META"); ServerName server = cluster.getServerHoldingMeta(); if (server == null) { LOG.warn("No server is holding hbase:meta right now."); return; } ClusterStatus clusterStatus = cluster.getClusterStatus(); if (server.equals(clusterStatus.getMaster())) { // Master holds the meta, so restart the master. restartMaster(server, sleepTime); } else { restartRs(server, sleepTime); } }
/** * Get region info from local cluster. */ Map<ServerName, List<String>> getDeployedHRIs(HBaseAdmin admin) throws IOException { ClusterStatus status = admin.getMaster().getClusterStatus(); Collection<ServerName> regionServers = status.getServers(); Map<ServerName, List<String>> mm = new HashMap<ServerName, List<String>>(); HConnection connection = admin.getConnection(); for (ServerName hsi : regionServers) { HRegionInterface server = connection.getHRegionConnection(hsi.getHostname(), hsi.getPort()); // list all online regions from this region server List<HRegionInfo> regions = server.getOnlineRegions(); List<String> regionNames = new ArrayList<String>(); for (HRegionInfo hri : regions) { regionNames.add(hri.getRegionNameAsString()); } mm.put(hsi, regionNames); } return mm; }
/** * Get region info from local cluster. */ Map<ServerName, List<String>> getDeployedHRIs( final HBaseAdmin admin) throws IOException { ClusterStatus status = admin.getClusterStatus(); Collection<ServerName> regionServers = status.getServers(); Map<ServerName, List<String>> mm = new HashMap<ServerName, List<String>>(); HConnection connection = admin.getConnection(); for (ServerName hsi : regionServers) { AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi); // list all online regions from this region server List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server); List<String> regionNames = new ArrayList<String>(); for (HRegionInfo hri : regions) { regionNames.add(hri.getRegionNameAsString()); } mm.put(hsi, regionNames); } return mm; }
protected void unbalanceRegions(ClusterStatus clusterStatus, List<ServerName> fromServers, List<ServerName> toServers, double fractionOfRegions) throws Exception { List<byte[]> victimRegions = new LinkedList<byte[]>(); for (ServerName server : fromServers) { ServerLoad serverLoad = clusterStatus.getLoad(server); // Ugh. List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet()); int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); for (int i = 0; i < victimRegionCount; ++i) { int victimIx = RandomUtils.nextInt(regions.size()); String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); victimRegions.add(Bytes.toBytes(regionId)); } } LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() + " servers to " + toServers.size() + " different servers"); HBaseAdmin admin = this.context.getHBaseIntegrationTestingUtility().getHBaseAdmin(); for (byte[] victimRegion : victimRegions) { int targetIx = RandomUtils.nextInt(toServers.size()); admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); } }
@Override public void perform() throws Exception { ClusterStatus status = this.cluster.getClusterStatus(); List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers()); int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); Assert.assertTrue((liveCount + deadCount) < victimServers.size()); List<ServerName> targetServers = new ArrayList<ServerName>(liveCount); for (int i = 0; i < liveCount + deadCount; ++i) { int victimIx = RandomUtils.nextInt(victimServers.size()); targetServers.add(victimServers.remove(victimIx)); } unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); Thread.sleep(WAIT_FOR_UNBALANCE_MS); for (int i = 0; i < liveCount; ++i) { killRs(targetServers.get(i)); } Thread.sleep(WAIT_FOR_KILLS_MS); forceBalancer(); Thread.sleep(WAIT_AFTER_BALANCE_MS); for (int i = 0; i < liveCount; ++i) { startRs(targetServers.get(i)); } }
/** * @return cluster status */ public ClusterStatus getClusterStatus() { // Build Set of backup masters from ZK nodes List<String> backupMasterStrings; try { backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper, this.zooKeeper.backupMasterAddressesZNode); } catch (KeeperException e) { LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e); backupMasterStrings = new ArrayList<String>(0); } List<ServerName> backupMasters = new ArrayList<ServerName>( backupMasterStrings.size()); for (String s: backupMasterStrings) { backupMasters.add(new ServerName(s)); } return new ClusterStatus(VersionInfo.getVersion(), this.fileSystemManager.getClusterId(), this.serverManager.getOnlineServers(), this.serverManager.getDeadServers(), this.serverName, backupMasters, this.assignmentManager.getRegionsInTransition(), this.getCoprocessors()); }
@Test public void testClusterStatus() throws Exception { ClusterStatus status = TEST_UTIL.getHBaseClusterInterface() .getClusterStatus(); StorageClusterStatusModel returnedStatus = remoteAdmin.getClusterStatus(); assertEquals( "Region count from cluster status and returned status did not match up. ", status.getRegionsCount(), returnedStatus.getRegions()); assertEquals( "Dead server count from cluster status and returned status did not match up. ", status.getDeadServers(), returnedStatus.getDeadNodes().size()); assertEquals( "Number of requests from cluster status and returned status did not match up. ", status.getRequestsCount(), returnedStatus.getRequests()); }
/** * Get region info from local cluster. */ Map<ServerName, List<String>> getDeployedHRIs( final HBaseAdmin admin) throws IOException { ClusterStatus status = admin.getClusterStatus(); Collection<ServerName> regionServers = status.getServers(); Map<ServerName, List<String>> mm = new HashMap<ServerName, List<String>>(); HConnection connection = admin.getConnection(); for (ServerName hsi : regionServers) { AdminProtocol server = connection.getAdmin(hsi.getHostname(), hsi.getPort()); // list all online regions from this region server List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server); List<String> regionNames = new ArrayList<String>(); for (HRegionInfo hri : regions) { regionNames.add(hri.getRegionNameAsString()); } mm.put(hsi, regionNames); } return mm; }
protected void unbalanceRegions(ClusterStatus clusterStatus, List<ServerName> fromServers, List<ServerName> toServers, double fractionOfRegions) throws Exception { List<byte[]> victimRegions = new LinkedList<byte[]>(); for (ServerName server : fromServers) { ServerLoad serverLoad = clusterStatus.getLoad(server); // Ugh. List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet()); int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); for (int i = 0; i < victimRegionCount; ++i) { int victimIx = random.nextInt(regions.size()); String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); victimRegions.add(Bytes.toBytes(regionId)); } } LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() + " servers to " + toServers.size() + " different servers"); HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); for (byte[] victimRegion : victimRegions) { int targetIx = random.nextInt(toServers.size()); admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); } }
public void setClusterStatus(ClusterStatus status) { long currentTime = EnvironmentEdgeManager.currentTime(); this.status = status; if (currentTime > lastFullRefresh + (CACHE_TIME / 2)) { // Only count the refresh if it includes user tables ( eg more than meta and namespace ). lastFullRefresh = scheduleFullRefresh()?currentTime:lastFullRefresh; } }
@Override public synchronized void setClusterStatus(ClusterStatus st) { super.setClusterStatus(st); updateRegionLoad(); for(CostFromRegionLoadFunction cost : regionLoadFunctions) { cost.setClusterStatus(st); } }
@Override protected void chore() { if (!connected) { return; } List<ServerName> sns = generateDeadServersListToSend(); if (sns.isEmpty()) { // Nothing to send. Done. return; } final long curTime = EnvironmentEdgeManager.currentTime(); if (lastMessageTime > curTime - messagePeriod) { // We already sent something less than 10 second ago. Done. return; } // Ok, we're going to send something then. lastMessageTime = curTime; // We're reusing an existing protobuf message, but we don't send everything. // This could be extended in the future, for example if we want to send stuff like the // hbase:meta server name. ClusterStatus cs = new ClusterStatus(VersionInfo.getVersion(), master.getMasterFileSystem().getClusterId().toString(), null, sns, master.getServerName(), null, null, null, null); publisher.publish(cs); }
private void init(RegionLocator regionLocator, Admin admin) throws IOException { if (!enabled(admin.getConfiguration())) { LOG.info("Region size calculation disabled."); return; } LOG.info("Calculating region sizes for table \"" + regionLocator.getName() + "\"."); //get regions for table List<HRegionLocation> tableRegionInfos = regionLocator.getAllRegionLocations(); Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR); for (HRegionLocation regionInfo : tableRegionInfos) { tableRegions.add(regionInfo.getRegionInfo().getRegionName()); } ClusterStatus clusterStatus = admin.getClusterStatus(); Collection<ServerName> servers = clusterStatus.getServers(); final long megaByte = 1024L * 1024L; //iterate all cluster regions, filter regions from our table and compute their size for (ServerName serverName: servers) { ServerLoad serverLoad = clusterStatus.getLoad(serverName); for (RegionLoad regionLoad: serverLoad.getRegionsLoad().values()) { byte[] regionId = regionLoad.getName(); if (tableRegions.contains(regionId)) { long regionSizeBytes = regionLoad.getStorefileSizeMB() * megaByte; sizeMap.put(regionId, regionSizeBytes); if (LOG.isDebugEnabled()) { LOG.debug("Region " + regionLoad.getNameAsString() + " has size " + regionSizeBytes); } } } } LOG.debug("Region sizes calculated"); }
/** * Alternative getCurrentNrHRS which is no longer available. * @param connection * @return Rough count of regionservers out on cluster. * @throws IOException */ private static int getRegionServerCount(final Connection connection) throws IOException { try (Admin admin = connection.getAdmin()) { ClusterStatus status = admin.getClusterStatus(); Collection<ServerName> servers = status.getServers(); return servers == null || servers.isEmpty()? 0: servers.size(); } }
/** * Test that if the master fails, the load balancer maintains its * state (running or not) when the next master takes over * * @throws Exception */ @Test(timeout = 240000) public void testMasterFailoverBalancerPersistence() throws Exception { final int NUM_MASTERS = 3; final int NUM_RS = 1; // Start the cluster HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); assertTrue(cluster.waitForActiveAndReadyMaster()); HMaster active = cluster.getMaster(); // check that the balancer is on by default for the active master ClusterStatus clusterStatus = active.getClusterStatus(); assertTrue(clusterStatus.isBalancerOn()); active = killActiveAndWaitForNewActive(cluster); // ensure the load balancer is still running on new master clusterStatus = active.getClusterStatus(); assertTrue(clusterStatus.isBalancerOn()); // turn off the load balancer active.balanceSwitch(false); // once more, kill active master and wait for new active master to show up active = killActiveAndWaitForNewActive(cluster); // ensure the load balancer is not running on the new master clusterStatus = active.getClusterStatus(); assertFalse(clusterStatus.isBalancerOn()); // Stop the cluster TEST_UTIL.shutdownMiniCluster(); }
protected void unbalanceRegions(ClusterStatus clusterStatus, List<ServerName> fromServers, List<ServerName> toServers, double fractionOfRegions) throws Exception { List<byte[]> victimRegions = new LinkedList<byte[]>(); for (ServerName server : fromServers) { ServerLoad serverLoad = clusterStatus.getLoad(server); // Ugh. List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet()); int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); for (int i = 0; i < victimRegionCount; ++i) { int victimIx = RandomUtils.nextInt(regions.size()); String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); victimRegions.add(Bytes.toBytes(regionId)); } } LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() + " servers to " + toServers.size() + " different servers"); Admin admin = this.context.getHBaseIntegrationTestingUtility().getHBaseAdmin(); for (byte[] victimRegion : victimRegions) { // Don't keep moving regions if we're // trying to stop the monkey. if (context.isStopping()) { break; } int targetIx = RandomUtils.nextInt(toServers.size()); admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); } }
@Override public void perform() throws Exception { ClusterStatus status = this.cluster.getClusterStatus(); List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers()); Set<ServerName> killedServers = new HashSet<ServerName>(); int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); Assert.assertTrue((liveCount + deadCount) < victimServers.size()); List<ServerName> targetServers = new ArrayList<ServerName>(liveCount); for (int i = 0; i < liveCount + deadCount; ++i) { int victimIx = RandomUtils.nextInt(victimServers.size()); targetServers.add(victimServers.remove(victimIx)); } unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); Thread.sleep(waitForUnbalanceMilliSec); for (int i = 0; i < liveCount; ++i) { // Don't keep killing servers if we're // trying to stop the monkey. if (context.isStopping()) { break; } killRs(targetServers.get(i)); killedServers.add(targetServers.get(i)); } Thread.sleep(waitForKillsMilliSec); forceBalancer(); Thread.sleep(waitAfterBalanceMilliSec); for (ServerName server:killedServers) { startRs(server); } }
@Override public void perform() throws Exception { LOG.info("Unbalancing regions"); ClusterStatus status = this.cluster.getClusterStatus(); List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers()); int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); List<ServerName> targetServers = new ArrayList<ServerName>(targetServerCount); for (int i = 0; i < targetServerCount; ++i) { int victimIx = RandomUtils.nextInt(victimServers.size()); targetServers.add(victimServers.remove(victimIx)); } unbalanceRegions(status, victimServers, targetServers, fractionOfRegions); }
/** * @return cluster status * @throws IOException if a remote or network exception occurs */ @Override public ClusterStatus getClusterStatus() throws IOException { return executeCallable(new MasterCallable<ClusterStatus>(getConnection()) { @Override public ClusterStatus call(int callTimeout) throws ServiceException { PayloadCarryingRpcController controller = rpcControllerFactory.newController(); controller.setCallTimeout(callTimeout); GetClusterStatusRequest req = RequestConverter.buildGetClusterStatusRequest(); return ClusterStatus.convert(master.getClusterStatus(controller, req).getClusterStatus()); } }); }