/** * Creates a new ephemeral node in the PENDING_SPLIT state for the specified region. Create it * ephemeral in case regionserver dies mid-split. * <p> * Does not transition nodes from other states. If a node already exists for this region, an * Exception will be thrown. * @param parent region to be created as offline * @param serverName server event originates from * @param hri_a daughter region * @param hri_b daughter region * @throws IOException */ @Override public void startSplitTransaction(HRegion parent, ServerName serverName, HRegionInfo hri_a, HRegionInfo hri_b) throws IOException { HRegionInfo region = parent.getRegionInfo(); try { LOG.debug(watcher.prefix("Creating ephemeral node for " + region.getEncodedName() + " in PENDING_SPLIT state")); byte[] payload = HRegionInfo.toDelimitedByteArray(hri_a, hri_b); RegionTransition rt = RegionTransition.createRegionTransition(RS_ZK_REQUEST_REGION_SPLIT, region.getRegionName(), serverName, payload); String node = ZKAssign.getNodeName(watcher, region.getEncodedName()); if (!ZKUtil.createEphemeralNodeAndWatch(watcher, node, rt.toByteArray())) { throw new IOException("Failed create of ephemeral " + node); } } catch (KeeperException e) { throw new IOException("Failed creating PENDING_SPLIT znode on " + parent.getRegionInfo().getRegionNameAsString(), e); } }
/** * Creates a new ephemeral node in the PENDING_MERGE state for the merged region. * Create it ephemeral in case regionserver dies mid-merge. * * <p> * Does not transition nodes from other states. If a node already exists for * this region, a {@link org.apache.zookeeper.KeeperException.NodeExistsException} will be thrown. * * @param region region to be created as offline * @param serverName server event originates from * @throws IOException */ @Override public void startRegionMergeTransaction(final HRegionInfo region, final ServerName serverName, final HRegionInfo a, final HRegionInfo b) throws IOException { LOG.debug(watcher.prefix("Creating ephemeral node for " + region.getEncodedName() + " in PENDING_MERGE state")); byte[] payload = HRegionInfo.toDelimitedByteArray(region, a, b); RegionTransition rt = RegionTransition.createRegionTransition(RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload); String node = ZKAssign.getNodeName(watcher, region.getEncodedName()); try { if (!ZKUtil.createEphemeralNodeAndWatch(watcher, node, rt.toByteArray())) { throw new IOException("Failed create of ephemeral " + node); } } catch (KeeperException e) { throw new IOException(e); } }
/** * Set region as OFFLINED up in zookeeper * * @param state * @return the version of the offline node if setting of the OFFLINE node was * successful, -1 otherwise. */ private int setOfflineInZooKeeper(final RegionState state, final ServerName destination) { if (!state.isClosed() && !state.isOffline()) { String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE."; this.server.abort(msg, new IllegalStateException(msg)); return -1; } regionStates.updateRegionState(state.getRegion(), State.OFFLINE); int versionOfOfflineNode; try { // get the version after setting the znode to OFFLINE versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher, state.getRegion(), destination); if (versionOfOfflineNode == -1) { LOG.warn("Attempted to create/force node into OFFLINE state before " + "completing assignment but failed to do so for " + state); return -1; } } catch (KeeperException e) { server.abort("Unexpected ZK exception creating/setting node OFFLINE", e); return -1; } return versionOfOfflineNode; }
/** * Set region as OFFLINED up in zookeeper asynchronously. * @param state * @return True if we succeeded, false otherwise (State was incorrect or failed * updating zk). */ private boolean asyncSetOfflineInZooKeeper(final RegionState state, final AsyncCallback.StringCallback cb, final ServerName destination) { if (!state.isClosed() && !state.isOffline()) { this.server.abort("Unexpected state trying to OFFLINE; " + state, new IllegalStateException()); return false; } regionStates.updateRegionState(state.getRegion(), State.OFFLINE); try { ZKAssign.asyncCreateNodeOffline(watcher, state.getRegion(), destination, cb, state); } catch (KeeperException e) { if (e instanceof NodeExistsException) { LOG.warn("Node for " + state.getRegion() + " already exists"); } else { server.abort("Unexpected ZK exception creating/setting node OFFLINE", e); } return false; } return true; }
private boolean deleteNodeInStates(String encodedName, String desc, ServerName sn, EventType... types) { try { for (EventType et: types) { if (ZKAssign.deleteNode(watcher, encodedName, et, sn)) { return true; } } LOG.info("Failed to delete the " + desc + " node for " + encodedName + ". The node type may not match"); } catch (NoNodeException e) { if (LOG.isDebugEnabled()) { LOG.debug("The " + desc + " node for " + encodedName + " already deleted"); } } catch (KeeperException ke) { server.abort("Unexpected ZK exception deleting " + desc + " node for the region " + encodedName, ke); } return false; }
private void onRegionOpen(final HRegionInfo hri, final ServerName sn, long openSeqNum) { regionOnline(hri, sn, openSeqNum); if (useZKForAssignment) { try { // Delete the ZNode if exists ZKAssign.deleteNodeFailSilent(watcher, hri); } catch (KeeperException ke) { server.abort("Unexpected ZK exception deleting node " + hri, ke); } } // reset the count, if any failedOpenTracker.remove(hri.getEncodedName()); if (getTableStateManager().isTableState(hri.getTable(), ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING)) { invokeUnAssign(hri); } }
private MiniHBaseCluster createRegions(String tableName) throws InterruptedException, ZooKeeperConnectionException, IOException, KeeperException { MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); log("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); zkw = new ZooKeeperWatcher(conf, "testOpenedRegionHandler", null); // Create a table with regions byte[] table = Bytes.toBytes(tableName); byte[] family = Bytes.toBytes("family"); TEST_UTIL.createTable(table, family); //wait till the regions are online log("Waiting for no more RIT"); ZKAssign.blockUntilNoRIT(zkw); return cluster; }
/** * Verifies that the specified region is in the specified state in ZooKeeper. * <p> * Returns true if region is in transition and in the specified state in * ZooKeeper. Returns false if the region does not exist in ZK or is in * a different state. * <p> * Method synchronizes() with ZK so will yield an up-to-date result but is * a slow read. * @param zkw * @param region * @param expectedState * @return true if region exists and is in expected state * @throws DeserializationException */ static boolean verifyRegionState(ZooKeeperWatcher zkw, HRegionInfo region, EventType expectedState) throws KeeperException, DeserializationException { String encoded = region.getEncodedName(); String node = ZKAssign.getNodeName(zkw, encoded); zkw.sync(node); // Read existing data of the node byte [] existingBytes = null; try { existingBytes = ZKUtil.getDataAndWatch(zkw, node); } catch (KeeperException.NoNodeException nne) { return false; } catch (KeeperException e) { throw e; } if (existingBytes == null) return false; RegionTransition rt = RegionTransition.parseFrom(existingBytes); return rt.getEventType().equals(expectedState); }
@Test(timeout = 60000) public void testOpenCloseByMasterWithZNode() throws Exception { ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest( getRS().getServerName(), regionName, true); AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr); Assert.assertTrue(responseClose.getClosed()); checkRegionIsClosed(HTU, getRS(), hri); ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), getRS().getServerName()); openRegion(HTU, getRS(), hri); }
private void OpenRegion(Server server, RegionServerServices rss, HTableDescriptor htd, HRegionInfo hri, OpenRegionCoordination coordination) throws IOException, NodeExistsException, KeeperException, DeserializationException { // Create it OFFLINE node, which is what Master set before sending OPEN RPC ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName()); OpenRegionCoordination.OpenRegionDetails ord = coordination.getDetailsForNonCoordinatedOpening(); OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, htd, -1, coordination, ord); rss.getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE); openHandler.process(); // This parse is not used? RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName())); // delete the node, which is what Master do after the region is opened ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(), EventType.RS_ZK_REGION_OPENED, server.getServerName()); }
private void setEncodingConf(DataBlockEncoding encoding, boolean onlineChange) throws Exception { LOG.debug("Setting CF encoding to " + encoding + " (ordinal=" + encoding.ordinal() + "), onlineChange=" + onlineChange); hcd.setDataBlockEncoding(encoding); try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { if (!onlineChange) { admin.disableTable(tableName); } admin.modifyColumn(tableName, hcd); if (!onlineChange) { admin.enableTable(tableName); } } // This is a unit test, not integration test. So let's // wait for regions out of transition. Otherwise, for online // encoding change, verification phase may be flaky because // regions could be still in transition. ZKAssign.blockUntilNoRIT(TEST_UTIL.getZooKeeperWatcher()); }
public void offlineDisabledRegion(HRegionInfo regionInfo) { // Disabling so should not be reassigned, just delete the CLOSED node LOG.debug("Table being disabled so deleting ZK node and removing from " + "regions in transition, skipping assignment of region " + regionInfo.getRegionNameAsString()); try { if (!ZKAssign.deleteClosedNode(watcher, regionInfo.getEncodedName())) { // Could also be in OFFLINE mode ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName()); } } catch (KeeperException.NoNodeException nne) { LOG.debug("Tried to delete closed node for " + regionInfo + " but it " + "does not exist so just offlining"); } catch (KeeperException e) { this.master.abort("Error deleting CLOSED node in ZK", e); } regionOffline(regionInfo); }
/** * @param path * @return True if znode is in SPLIT or SPLITTING state. * @throws KeeperException Can happen if the znode went away in meantime. */ private boolean isSplitOrSplitting(final String path) throws KeeperException { boolean result = false; // This may fail if the SPLIT or SPLITTING znode gets cleaned up before we // can get data from it. RegionTransitionData data = ZKAssign.getData(master.getZooKeeper(), path); EventType evt = data.getEventType(); switch (evt) { case RS_ZK_REGION_SPLIT: case RS_ZK_REGION_SPLITTING: result = true; break; default: break; } return result; }
/** * Fakes the regionserver-side zk transitions of a region open. * @param w ZooKeeperWatcher to use. * @param sn Name of the regionserver doing the 'opening' * @param hri Region we're 'opening'. * @throws KeeperException * @throws DeserializationException */ static void fakeRegionServerRegionOpenInZK(HMaster master, final ZooKeeperWatcher w, final ServerName sn, final HRegionInfo hri) throws KeeperException, DeserializationException, InterruptedException { // Wait till the we region is ready to be open in RIT. waitForRegionPendingOpenInRIT(master.getAssignmentManager(), hri.getEncodedName()); // Get current versionid else will fail on transition from OFFLINE to OPENING below int versionid = ZKAssign.getVersion(w, hri); assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. I // looked at exposing the method over in openregionhandler but its just a // one liner and its deep over in another package so just repeat it below. versionid = ZKAssign.transitionNode(w, hri, sn, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, versionid); assertNotSame(-1, versionid); // Move znode from OPENING to OPENED as RS does on successful open. versionid = ZKAssign.transitionNodeOpened(w, hri, sn, versionid); assertNotSame(-1, versionid); // We should be done now. The master open handler will notice the // transition and remove this regions znode. }
private void setEncodingConf(DataBlockEncoding encoding, boolean onlineChange) throws Exception { LOG.debug("Setting CF encoding to " + encoding + " (ordinal=" + encoding.ordinal() + "), onlineChange=" + onlineChange); hcd.setDataBlockEncoding(encoding); if (!onlineChange) { admin.disableTable(tableName); } admin.modifyColumn(tableName, hcd); if (!onlineChange) { admin.enableTable(tableName); } // This is a unit test, not integration test. So let's // wait for regions out of transition. Otherwise, for online // encoding change, verification phase may be flaky because // regions could be still in transition. ZKAssign.blockUntilNoRIT(TEST_UTIL.getZooKeeperWatcher()); }
/** * @param r Region we're working on. * @return whether znode is successfully transitioned to OPENED state. * @throws IOException */ private boolean transitionToOpened(final HRegion r) throws IOException { boolean result = false; HRegionInfo hri = r.getRegionInfo(); final String name = hri.getRegionNameAsString(); // Finally, Transition ZK node to OPENED try { if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri, this.server.getServerName(), this.version) == -1) { LOG.warn("Completed the OPEN of region " + name + " but when transitioning from " + " OPENING to OPENED got a version mismatch, someone else clashed " + "so now unassigning -- closing region on server: " + this.server.getServerName()); } else { LOG.debug("region transitioned to opened in zookeeper: " + r.getRegionInfo() + ", server: " + this.server.getServerName()); result = true; } } catch (KeeperException e) { LOG.error("Failed transitioning node " + name + " from OPENING to OPENED -- closing region", e); } return result; }
/** * Update our OPENING state in zookeeper. * Do this so master doesn't timeout this region-in-transition. * @param context Some context to add to logs if failure * @return True if successful transition. */ boolean tickleOpening(final String context) { // If previous checks failed... do not try again. if (!isGoodVersion()) return false; String encodedName = this.regionInfo.getEncodedName(); try { this.version = ZKAssign.retransitionNodeOpening(server.getZooKeeper(), this.regionInfo, this.server.getServerName(), this.version); } catch (KeeperException e) { server.abort("Exception refreshing OPENING; region=" + encodedName + ", context=" + context, e); this.version = -1; } boolean b = isGoodVersion(); if (!b) { LOG.warn("Failed refreshing OPENING; region=" + encodedName + ", context=" + context); } return b; }
/** * Run a simple server shutdown handler. * @throws KeeperException * @throws IOException */ @Test public void testShutdownHandler() throws KeeperException, IOException { // Create and startup an executor. This is used by AssignmentManager // handling zk callbacks. ExecutorService executor = startupMasterExecutor("testShutdownHandler"); // We need a mocked catalog tracker. CatalogTracker ct = Mockito.mock(CatalogTracker.class); LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server .getConfiguration()); // Create an AM. AssignmentManager am = new AssignmentManager(this.server, this.serverManager, ct, balancer, executor); try { processServerShutdownHandler(ct, am, false, null); } finally { executor.shutdown(); am.shutdown(); // Clean up all znodes ZKAssign.deleteAllNodes(this.watcher); } }
/** * Creates a new ephemeral node in the SPLITTING state for the specified region. * Create it ephemeral in case regionserver dies mid-split. * * <p>Does not transition nodes from other states. If a node already exists * for this region, a {@link NodeExistsException} will be thrown. * * @param zkw zk reference * @param region region to be created as offline * @param serverName server event originates from * @return Version of znode created. * @throws KeeperException * @throws IOException */ // Copied from SplitTransaction rather than open the method over there in // the regionserver package. private static int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region, final ServerName serverName) throws KeeperException, IOException { RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING, region.getRegionName(), serverName); String node = ZKAssign.getNodeName(zkw, region.getEncodedName()); if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) { throw new IOException("Failed create of ephemeral " + node); } // Transition node from SPLITTING to SPLITTING and pick up version so we // can be sure this znode is ours; version is needed deleting. return transitionNodeSplitting(zkw, region, serverName, -1); }
/** * This is not guaranteed to succeed, we just do our best. * @param hri Region we're working on. * @return whether znode is successfully transitioned to FAILED_OPEN state. */ private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) { boolean result = false; final String name = hri.getRegionNameAsString(); try { LOG.info("Opening of region " + hri + " failed, transitioning" + " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version); if (ZKAssign.transitionNode( this.server.getZooKeeper(), hri, this.server.getServerName(), EventType.RS_ZK_REGION_OPENING, EventType.RS_ZK_REGION_FAILED_OPEN, this.version) == -1) { LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " + "It's likely that the master already timed out this open " + "attempt, and thus another RS already has the region."); } else { result = true; } } catch (KeeperException e) { LOG.error("Failed transitioning node " + name + " from OPENING to FAILED_OPEN", e); } return result; }
@Test public void testFailedUpdateMeta() throws Exception { Server server = new MockServer(HTU); RegionServerServices rsServices = HTU.createMockRegionServerService(); // Create it OFFLINE, which is what it expects ZKAssign.createNodeOffline(server.getZooKeeper(), TEST_HRI, server.getServerName()); // Create the handler OpenRegionHandler handler = new OpenRegionHandler(server, rsServices, TEST_HRI, TEST_HTD) { @Override boolean updateMeta(final HRegion r) { // Fake failure of updating META return false; } }; rsServices.getRegionsInTransitionInRS().put( TEST_HRI.getEncodedNameAsBytes(), Boolean.TRUE); handler.process(); // Handler should have transitioned it to FAILED_OPEN RegionTransition rt = RegionTransition.parseFrom( ZKAssign.getData(server.getZooKeeper(), TEST_HRI.getEncodedName())); assertEquals(EventType.RS_ZK_REGION_FAILED_OPEN, rt.getEventType()); }
/** * Creates a new ephemeral node in the PENDING_SPLIT state for the specified region. Create it * ephemeral in case regionserver dies mid-split. * <p> * Does not transition nodes from other states. If a node already exists for this region, an * Exception will be thrown. * @param parent region to be created as offline * @param serverName server event originates from * @param hri_a daughter region * @param hri_b daughter region * @throws IOException */ @Override public void startSplitTransaction(HRegion parent, ServerName serverName, HRegionInfo hri_a, HRegionInfo hri_b) throws IOException { HRegionInfo region = parent.getRegionInfo(); try { LOG.debug(watcher.prefix("Creating ephemeral node for " + region.getEncodedName() + " in PENDING_SPLIT state")); byte[] payload = HRegionInfo.toDelimitedByteArray(hri_a, hri_b); RegionTransition rt = RegionTransition.createRegionTransition(RS_ZK_REQUEST_REGION_SPLIT, region.getRegionName(), serverName, payload); String node = ZKAssign.getNodeName(watcher, region.getEncodedName()); if (!ZKUtil.createEphemeralNodeAndWatch(watcher, node, rt.toByteArray())) { throw new IOException("Failed create of ephemeral " + node); } } catch (KeeperException e) { throw new IOException("Failed creating PENDING_SPLIT znode on " + parent.getRegionNameAsString(), e); } }
/** * In ZK-based version we're checking for bad znode state, e.g. if we're * trying to delete the znode, and it's not ours (version doesn't match). */ @Override public boolean checkClosingState(HRegionInfo regionInfo, CloseRegionDetails crd) { ZkCloseRegionDetails zkCrd = (ZkCloseRegionDetails) crd; try { return zkCrd.isPublishStatusInZk() && !ZKAssign.checkClosingState(watcher, regionInfo, ((ZkCloseRegionDetails) crd).getExpectedVersion()); } catch (KeeperException ke) { csm.getServer().abort("Unrecoverable exception while checking state with zk " + regionInfo.getRegionNameAsString() + ", still finishing close", ke); throw new RuntimeException(ke); } }
/** * @param r Region we're working on. * @return whether znode is successfully transitioned to OPENED state. * @throws java.io.IOException */ @Override public boolean transitionToOpened(final HRegion r, OpenRegionDetails ord) throws IOException { ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; boolean result = false; HRegionInfo hri = r.getRegionInfo(); final String name = hri.getRegionNameAsString(); // Finally, Transition ZK node to OPENED try { if (ZKAssign.transitionNodeOpened(watcher, hri, zkOrd.getServerName(), zkOrd.getVersion()) == -1) { String warnMsg = "Completed the OPEN of region " + name + " but when transitioning from " + " OPENING to OPENED "; try { String node = ZKAssign.getNodeName(watcher, hri.getEncodedName()); if (ZKUtil.checkExists(watcher, node) < 0) { // if the znode coordination.getServer().abort(warnMsg + "the znode disappeared", null); } else { LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " + "so now unassigning -- closing region on server: " + zkOrd.getServerName()); } } catch (KeeperException ke) { coordination.getServer().abort(warnMsg, ke); } } else { LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() + " to OPENED in zk on " + zkOrd.getServerName()); result = true; } } catch (KeeperException e) { LOG.error("Failed transitioning node " + name + " from OPENING to OPENED -- closing region", e); } return result; }
/** * Transition ZK node from OFFLINE to OPENING. * @param regionInfo region info instance * @param ord - instance of open region details, for ZK implementation * will include version Of OfflineNode that needs to be compared * before changing the node's state from OFFLINE * @return True if successful transition. */ @Override public boolean transitionFromOfflineToOpening(HRegionInfo regionInfo, OpenRegionDetails ord) { ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; // encoded name is used as znode encoded name in ZK final String encodedName = regionInfo.getEncodedName(); // TODO: should also handle transition from CLOSED? try { // Initialize the znode version. zkOrd.setVersion(ZKAssign.transitionNode(watcher, regionInfo, zkOrd.getServerName(), EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, zkOrd.getVersionOfOfflineNode())); } catch (KeeperException e) { LOG.error("Error transition from OFFLINE to OPENING for region=" + encodedName, e); zkOrd.setVersion(-1); return false; } boolean b = isGoodVersion(zkOrd); if (!b) { LOG.warn("Failed transition from OFFLINE to OPENING for region=" + encodedName); } return b; }
/** * Update our OPENING state in zookeeper. * Do this so master doesn't timeout this region-in-transition. * We may lose the znode ownership during the open. Currently its * too hard interrupting ongoing region open. Just let it complete * and check we still have the znode after region open. * * @param context Some context to add to logs if failure * @return True if successful transition. */ @Override public boolean tickleOpening(OpenRegionDetails ord, HRegionInfo regionInfo, RegionServerServices rsServices, final String context) { ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; if (!isRegionStillOpening(regionInfo, rsServices)) { LOG.warn("Open region aborted since it isn't opening any more"); return false; } // If previous checks failed... do not try again. if (!isGoodVersion(zkOrd)) return false; String encodedName = regionInfo.getEncodedName(); try { zkOrd.setVersion(ZKAssign.confirmNodeOpening(watcher, regionInfo, zkOrd.getServerName(), zkOrd.getVersion())); } catch (KeeperException e) { coordination.getServer().abort("Exception refreshing OPENING; region=" + encodedName + ", context=" + context, e); zkOrd.setVersion(-1); return false; } boolean b = isGoodVersion(zkOrd); if (!b) { LOG.warn("Failed refreshing OPENING; region=" + encodedName + ", context=" + context); } return b; }
/** * Try to transition to open. * * This is not guaranteed to succeed, we just do our best. * * @param rsServices * @param hri Region we're working on. * @param ord Details about region open task * @return whether znode is successfully transitioned to FAILED_OPEN state. */ @Override public boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices, final HRegionInfo hri, OpenRegionDetails ord) { ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; boolean result = false; final String name = hri.getRegionNameAsString(); try { LOG.info("Opening of region " + hri + " failed, transitioning" + " from OFFLINE to FAILED_OPEN in ZK, expecting version " + zkOrd.getVersionOfOfflineNode()); if (ZKAssign.transitionNode( rsServices.getZooKeeper(), hri, rsServices.getServerName(), EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, zkOrd.getVersionOfOfflineNode()) == -1) { LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " + "It's likely that the master already timed out this open " + "attempt, and thus another RS already has the region."); } else { result = true; } } catch (KeeperException e) { LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e); } return result; }
/** * This is not guaranteed to succeed, we just do our best. * @param hri Region we're working on. * @return whether znode is successfully transitioned to FAILED_OPEN state. */ @Override public boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri, OpenRegionDetails ord) { ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; boolean result = false; final String name = hri.getRegionNameAsString(); try { LOG.info("Opening of region " + hri + " failed, transitioning" + " from OPENING to FAILED_OPEN in ZK, expecting version " + zkOrd.getVersion()); if (ZKAssign.transitionNode( watcher, hri, zkOrd.getServerName(), EventType.RS_ZK_REGION_OPENING, EventType.RS_ZK_REGION_FAILED_OPEN, zkOrd.getVersion()) == -1) { LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " + "It's likely that the master already timed out this open " + "attempt, and thus another RS already has the region."); } else { result = true; } } catch (KeeperException e) { LOG.error("Failed transitioning node " + name + " from OPENING to FAILED_OPEN", e); } return result; }
/** * New unassigned node has been created. * * <p>This happens when an RS begins the OPENING, SPLITTING or CLOSING of a * region by creating a znode. * * <p>When this happens we must: * <ol> * <li>Watch the node for further children changed events</li> * <li>Watch all new children for changed events</li> * </ol> */ @Override public void nodeChildrenChanged(String path) { if (path.equals(watcher.assignmentZNode)) { zkEventWorkers.submit(new Runnable() { @Override public void run() { try { // Just make sure we see the changes for the new znodes List<String> children = ZKUtil.listChildrenAndWatchForNewChildren( watcher, watcher.assignmentZNode); if (children != null) { Stat stat = new Stat(); for (String child : children) { // if region is in transition, we already have a watch // on it, so no need to watch it again. So, as I know for now, // this is needed to watch splitting nodes only. if (!regionStates.isRegionInTransition(child)) { ZKAssign.getDataAndWatch(watcher, child, stat); } } } } catch (KeeperException e) { server.abort("Unexpected ZK exception reading unassigned children", e); } } }); } }
private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) { // Remove region from in-memory transition and unassigned node from ZK // While trying to enable the table the regions of the table were // already enabled. LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString() + " to " + sn); String encodedName = region.getEncodedName(); //If use ZkForAssignment, region already Opened event should not be handled, //leave it to zk event. See HBase-14407. if(useZKForAssignment){ String node = ZKAssign.getNodeName(watcher, encodedName); Stat stat = new Stat(); try { byte[] existingBytes = ZKUtil.getDataNoWatch(watcher, node, stat); if(existingBytes!=null){ RegionTransition rt= RegionTransition.parseFrom(existingBytes); EventType et = rt.getEventType(); if (et.equals(EventType.RS_ZK_REGION_OPENED)) { LOG.debug("ALREADY_OPENED " + region.getRegionNameAsString() + " and node in "+et+" state"); return; } } } catch (KeeperException ke) { LOG.warn("Unexpected ZK exception getData " + node + " node for the region " + encodedName, ke); } catch (DeserializationException e) { LOG.warn("Get RegionTransition from zk deserialization failed! ", e); } deleteNodeInStates(encodedName, "offline", sn, EventType.M_ZK_REGION_OFFLINE); } regionStates.regionOnline(region, sn); }
/** * @param path * @return True if znode is in SPLIT or SPLITTING or MERGED or MERGING state. * @throws KeeperException Can happen if the znode went away in meantime. * @throws DeserializationException */ private boolean isSplitOrSplittingOrMergedOrMerging(final String path) throws KeeperException, DeserializationException { boolean result = false; // This may fail if the SPLIT or SPLITTING or MERGED or MERGING znode gets // cleaned up before we can get data from it. byte [] data = ZKAssign.getData(watcher, path); if (data == null) { LOG.info("Node " + path + " is gone"); return false; } RegionTransition rt = RegionTransition.parseFrom(data); switch (rt.getEventType()) { case RS_ZK_REQUEST_REGION_SPLIT: case RS_ZK_REGION_SPLIT: case RS_ZK_REGION_SPLITTING: case RS_ZK_REQUEST_REGION_MERGE: case RS_ZK_REGION_MERGED: case RS_ZK_REGION_MERGING: result = true; break; default: LOG.info("Node " + path + " is in " + rt.getEventType()); break; } return result; }
private void setRegionOpenedOnZK(final ZooKeeperWatcher zkWatcher, final ServerName serverName, HRegionInfo hregionInfo) throws Exception { int version = ZKAssign.getVersion(zkWatcher, hregionInfo); int versionTransition = ZKAssign.transitionNode(zkWatcher, hregionInfo, serverName, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, version); ZKAssign.transitionNodeOpened(zkWatcher, hregionInfo, serverName, versionTransition); }
private void openRegion(HRegionInfo hri) throws Exception { try { if (isRegionOpened(hri)) return; } catch (Exception e){} ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); // first version is '0' AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest( getRS().getServerName(), hri, 0, null, null); AdminProtos.OpenRegionResponse responseOpen = getRS().getRSRpcServices().openRegion(null, orr); Assert.assertEquals(responseOpen.getOpeningStateCount(), 1); Assert.assertEquals(responseOpen.getOpeningState(0), AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED); checkRegionIsOpened(hri); }