/** * A test that intentionally has master fail the processing of the split message. * Tests that the regionserver split ephemeral node gets cleaned up if it * crashes and that after we process server shutdown, the daughters are up on * line. * @throws IOException * @throws InterruptedException * @throws NodeExistsException * @throws KeeperException */ @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling() throws IOException, InterruptedException, NodeExistsException, KeeperException { final byte [] tableName = Bytes.toBytes("ephemeral"); // Create table then get the single region for our new table. HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = cluster.getRegions(tableName); HRegionInfo hri = getAndCheckSingleTableRegion(regions); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); // Turn off balancer so it doesn't cut in and mess up our placements. this.admin.setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. cluster.getMaster().setCatalogJanitorEnabled(false); try { // Add a bit of load up into the table so splittable. TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); // Get region pre-split. HRegionServer server = cluster.getRegionServer(tableRegionIndex); printOutRegions(server, "Initial regions: "); int regionCount = server.getOnlineRegions().size(); // Now, before we split, set special flag in master, a flag that has // it FAIL the processing of split. SplitRegionHandler.TEST_SKIP = true; // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters List<HRegion> daughters = checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection().getZooKeeperWatcher(), hri.getEncodedName()); Stat stats = t.getConnection().getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats); RegionTransitionData rtd = ZKAssign.getData(t.getConnection().getZooKeeperWatcher(), hri.getEncodedName()); // State could be SPLIT or SPLITTING. assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING)); // Now crash the server cluster.abortRegionServer(tableRegionIndex); waitUntilRegionServerDead(); awaitDaughters(tableName, daughters.size()); // Assert daughters are online. regions = cluster.getRegions(tableName); for (HRegion r: regions) { assertTrue(daughters.contains(r)); } // Finally assert that the ephemeral SPLIT znode was cleaned up. for (int i=0; i<100; i++) { // wait a bit (10s max) for the node to disappear stats = t.getConnection().getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); if (stats == null) break; Thread.sleep(100); } LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats); assertTrue(stats == null); } finally { // Set this flag back. SplitRegionHandler.TEST_SKIP = false; admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true); t.close(); } }
/** * Verifies HBASE-5806. When splitting is partially done and the master goes down * when the SPLIT node is in either SPLIT or SPLITTING state. * * @throws IOException * @throws InterruptedException * @throws NodeExistsException * @throws KeeperException */ @Test(timeout = 300000) public void testMasterRestartWhenSplittingIsPartial() throws IOException, InterruptedException, NodeExistsException, KeeperException { final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial"); // Create table then get the single region for our new table. HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = cluster.getRegions(tableName); HRegionInfo hri = getAndCheckSingleTableRegion(regions); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); // Turn off the meta scanner so it don't remove parent on us. cluster.getMaster().setCatalogJanitorEnabled(false); // Turn off balancer so it doesn't cut in and mess up our placements. this.admin.setBalancerRunning(false, true); try { // Add a bit of load up into the table so splittable. TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); // Get region pre-split. HRegionServer server = cluster.getRegionServer(tableRegionIndex); printOutRegions(server, "Initial regions: "); int regionCount = server.getOnlineRegions().size(); // Now, before we split, set special flag in master, a flag that has // it FAIL the processing of split. SplitRegionHandler.TEST_SKIP = true; // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); Stat stats = t.getConnection().getZooKeeperWatcher() .getRecoverableZooKeeper().exists(path, false); LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats); RegionTransitionData rtd = ZKAssign.getData(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); // State could be SPLIT or SPLITTING. assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING)); // abort and wait for new master. MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster(); this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); // update the hri to be offlined and splitted. hri.setOffline(true); hri.setSplit(true); ServerName regionServerOfRegion = master.getAssignmentManager() .getRegionServerOfRegion(hri); assertTrue(regionServerOfRegion != null); } finally { // Set this flag back. SplitRegionHandler.TEST_SKIP = false; admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true); t.close(); } }
/** * Verifies HBASE-5806. Here the case is that splitting is completed but before the * CJ could remove the parent region the master is killed and restarted. * @throws IOException * @throws InterruptedException * @throws NodeExistsException * @throws KeeperException */ @Test (timeout = 300000) public void testMasterRestartAtRegionSplitPendingCatalogJanitor() throws IOException, InterruptedException, NodeExistsException, KeeperException { final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor"); // Create table then get the single region for our new table. this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = cluster.getRegions(tableName); HRegionInfo hri = getAndCheckSingleTableRegion(regions); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); // Turn off balancer so it doesn't cut in and mess up our placements. this.admin.setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. cluster.getMaster().setCatalogJanitorEnabled(false); try { // Add a bit of load up into the table so splittable. TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); // Get region pre-split. HRegionServer server = cluster.getRegionServer(tableRegionIndex); printOutRegions(server, "Initial regions: "); int regionCount = server.getOnlineRegions().size(); split(hri, server, regionCount); // Get daughters checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); Stat stats = t.getConnection().getZooKeeperWatcher() .getRecoverableZooKeeper().exists(path, false); LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats); String node = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); Stat stat = new Stat(); byte[] data = ZKUtil.getDataNoWatch(t.getConnection() .getZooKeeperWatcher(), node, stat); // ZKUtil.create for (int i=0; data != null && i<60; i++) { Thread.sleep(1000); data = ZKUtil.getDataNoWatch(t.getConnection().getZooKeeperWatcher(), node, stat); } assertNull("Waited too long for ZK node to be removed: "+node, data); MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster(); this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); hri.setOffline(true); hri.setSplit(true); ServerName regionServerOfRegion = master.getAssignmentManager() .getRegionServerOfRegion(hri); assertTrue(regionServerOfRegion == null); } finally { // Set this flag back. SplitRegionHandler.TEST_SKIP = false; this.admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true); t.close(); } }
/** * A test that intentionally has master fail the processing of the split message. * Tests that the regionserver split ephemeral node gets cleaned up if it * crashes and that after we process server shutdown, the daughters are up on * line. * @throws IOException * @throws InterruptedException * @throws NodeExistsException * @throws KeeperException * @throws DeserializationException */ @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling() throws IOException, InterruptedException, NodeExistsException, KeeperException, DeserializationException, ServiceException { final byte [] tableName = Bytes.toBytes("ephemeral"); // Create table then get the single region for our new table. HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = cluster.getRegions(tableName); HRegionInfo hri = getAndCheckSingleTableRegion(regions); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); // Turn off balancer so it doesn't cut in and mess up our placements. this.admin.setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. cluster.getMaster().setCatalogJanitorEnabled(false); try { // Add a bit of load up into the table so splittable. TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); // Get region pre-split. HRegionServer server = cluster.getRegionServer(tableRegionIndex); printOutRegions(server, "Initial regions: "); int regionCount = ProtobufUtil.getOnlineRegions(server).size(); // Now, before we split, set special flag in master, a flag that has // it FAIL the processing of split. SplitRegionHandler.TEST_SKIP = true; // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters List<HRegion> daughters = checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(), hri.getEncodedName()); Stat stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats); RegionTransition rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(), hri.getEncodedName())); // State could be SPLIT or SPLITTING. assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) || rt.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING)); // Now crash the server cluster.abortRegionServer(tableRegionIndex); waitUntilRegionServerDead(); awaitDaughters(tableName, daughters.size()); // Assert daughters are online. regions = cluster.getRegions(tableName); for (HRegion r: regions) { assertTrue(daughters.contains(r)); } // Finally assert that the ephemeral SPLIT znode was cleaned up. for (int i=0; i<100; i++) { // wait a bit (10s max) for the node to disappear stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); if (stats == null) break; Thread.sleep(100); } LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats); assertTrue(stats == null); } finally { // Set this flag back. SplitRegionHandler.TEST_SKIP = false; admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true); } }
/** * Verifies HBASE-5806. When splitting is partially done and the master goes down * when the SPLIT node is in either SPLIT or SPLITTING state. * * @throws IOException * @throws InterruptedException * @throws NodeExistsException * @throws KeeperException * @throws DeserializationException */ @Test(timeout = 300000) public void testMasterRestartWhenSplittingIsPartial() throws IOException, InterruptedException, NodeExistsException, KeeperException, DeserializationException, ServiceException { final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial"); // Create table then get the single region for our new table. HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = cluster.getRegions(tableName); HRegionInfo hri = getAndCheckSingleTableRegion(regions); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); // Turn off balancer so it doesn't cut in and mess up our placements. this.admin.setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. cluster.getMaster().setCatalogJanitorEnabled(false); try { // Add a bit of load up into the table so splittable. TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); // Get region pre-split. HRegionServer server = cluster.getRegionServer(tableRegionIndex); printOutRegions(server, "Initial regions: "); // Now, before we split, set special flag in master, a flag that has // it FAIL the processing of split. SplitRegionHandler.TEST_SKIP = true; // Now try splitting and it should work. this.admin.split(hri.getRegionNameAsString()); checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); Stat stats = t.getConnection().getZooKeeperWatcher() .getRecoverableZooKeeper().exists(path, false); LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats); byte[] bytes = ZKAssign.getData(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); RegionTransition rtd = RegionTransition.parseFrom(bytes); // State could be SPLIT or SPLITTING. assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING)); // abort and wait for new master. MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster(); this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); // update the hri to be offlined and splitted. hri.setOffline(true); hri.setSplit(true); ServerName regionServerOfRegion = master.getAssignmentManager() .getRegionStates().getRegionServerOfRegion(hri); assertTrue(regionServerOfRegion != null); } finally { // Set this flag back. SplitRegionHandler.TEST_SKIP = false; admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true); } }
/** * Verifies HBASE-5806. Here the case is that splitting is completed but before the * CJ could remove the parent region the master is killed and restarted. * @throws IOException * @throws InterruptedException * @throws NodeExistsException * @throws KeeperException */ @Test (timeout = 300000) public void testMasterRestartAtRegionSplitPendingCatalogJanitor() throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException { final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor"); // Create table then get the single region for our new table. HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = cluster.getRegions(tableName); HRegionInfo hri = getAndCheckSingleTableRegion(regions); int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); // Turn off balancer so it doesn't cut in and mess up our placements. this.admin.setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. cluster.getMaster().setCatalogJanitorEnabled(false); try { // Add a bit of load up into the table so splittable. TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); // Get region pre-split. HRegionServer server = cluster.getRegionServer(tableRegionIndex); printOutRegions(server, "Initial regions: "); this.admin.split(hri.getRegionNameAsString()); checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); Stat stats = t.getConnection().getZooKeeperWatcher() .getRecoverableZooKeeper().exists(path, false); LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats); String node = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); Stat stat = new Stat(); byte[] data = ZKUtil.getDataNoWatch(t.getConnection() .getZooKeeperWatcher(), node, stat); // ZKUtil.create for (int i=0; data != null && i<60; i++) { Thread.sleep(1000); data = ZKUtil.getDataNoWatch(t.getConnection().getZooKeeperWatcher(), node, stat); } assertNull("Waited too long for ZK node to be removed: "+node, data); MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster(); this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); hri.setOffline(true); hri.setSplit(true); ServerName regionServerOfRegion = master.getAssignmentManager() .getRegionStates().getRegionServerOfRegion(hri); assertTrue(regionServerOfRegion == null); } finally { // Set this flag back. SplitRegionHandler.TEST_SKIP = false; this.admin.setBalancerRunning(true, false); cluster.getMaster().setCatalogJanitorEnabled(true); } }