/** * Called to verify that this server is up and running. * * @throws IOException */ protected void checkOpen() throws IOException { if (regionServer.isAborted()) { throw new RegionServerAbortedException("Server " + regionServer.serverName + " aborting"); } if (regionServer.isStopped()) { throw new RegionServerStoppedException("Server " + regionServer.serverName + " stopping"); } if (!regionServer.fsOk) { throw new RegionServerStoppedException("File system not available"); } if (!regionServer.isOnline()) { throw new ServerNotRunningYetException("Server " + regionServer.serverName + " is not running yet"); } }
@Override protected CloseRegionResponse execCloseRegion(ServerName server, byte[] regionName) throws IOException { switch (this.invocations++) { case 0: throw new NotServingRegionException("Fake"); case 1: throw new RegionServerAbortedException("Fake!"); case 2: throw new RegionServerStoppedException("Fake!"); case 3: throw new ServerNotRunningYetException("Fake!"); case 4: LOG.info("Return null response from serverName=" + server + "; means STUCK...TODO timeout"); executor.schedule(new Runnable() { @Override public void run() { LOG.info("Sending in CRASH of " + server); doCrash(server); } }, 1, TimeUnit.SECONDS); return null; default: return super.execCloseRegion(server, regionName); } }
@Override public boolean canInfinitelyRetry(Throwable t){ t=Throwables.getRootCause(t); t=processPipelineException(t); if(t instanceof NotServingPartitionException || t instanceof WrongPartitionException || t instanceof PipelineTooBusy || t instanceof RegionBusyException || t instanceof NoRouteToHostException || t instanceof org.apache.hadoop.hbase.ipc.FailedServerException || t instanceof FailedServerException || t instanceof ServerNotRunningYetException || t instanceof ConnectTimeoutException || t instanceof IndexNotSetUpException) return true; return false; }
private RegionServerStartupResponse reportForDuty() throws IOException { ServerName masterServerName = createRegionServerStatusStub(); if (masterServerName == null) return null; RegionServerStartupResponse result = null; try { rpcServices.requestCount.set(0); LOG.info( "reportForDuty to master=" + masterServerName + " with port=" + rpcServices.isa.getPort() + ", startcode=" + this.startcode); long now = EnvironmentEdgeManager.currentTime(); int port = rpcServices.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); if (shouldUseThisHostnameInstead()) { request.setUseThisHostnameInstead(useThisHostnameInstead); } request.setPort(port); request.setServerStartCode(this.startcode); request.setServerCurrentTime(now); result = this.rssStub.regionServerStartup(null, request.build()); } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); if (ioe instanceof ClockOutOfSyncException) { LOG.fatal("Master rejected startup because clock is out of sync", ioe); // Re-throw IOE will cause RS to abort throw ioe; } else if (ioe instanceof ServerNotRunningYetException) { LOG.debug("Master is not running yet"); } else { LOG.warn("error telling master we are up", se); } rssStub = null; } return result; }
/** * Called to verify that this server is up and running. * * @throws IOException */ protected void checkOpen() throws IOException { if (regionServer.isAborted()) { throw new RegionServerAbortedException("Server " + regionServer.serverName + " aborting"); } if (regionServer.isStopped()) { throw new RegionServerStoppedException("Server " + regionServer.serverName + " stopping"); } if (!regionServer.fsOk) { throw new RegionServerStoppedException("File system not available"); } if (!regionServer.isOnline()) { throw new ServerNotRunningYetException("Server is not running yet"); } }
private RegionServerStartupResponse reportForDuty() throws IOException { ServerName masterServerName = createRegionServerStatusStub(); if (masterServerName == null) return null; RegionServerStartupResponse result = null; try { rpcServices.requestCount.set(0); LOG.info("reportForDuty to master=" + masterServerName + " with port=" + rpcServices.isa.getPort() + ", startcode=" + this.startcode); long now = EnvironmentEdgeManager.currentTime(); int port = rpcServices.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); request.setPort(port); request.setServerStartCode(this.startcode); request.setServerCurrentTime(now); result = this.rssStub.regionServerStartup(null, request.build()); } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); if (ioe instanceof ClockOutOfSyncException) { LOG.fatal("Master rejected startup because clock is out of sync", ioe); // Re-throw IOE will cause RS to abort throw ioe; } else if (ioe instanceof ServerNotRunningYetException) { LOG.debug("Master is not running yet"); } else { LOG.warn("error telling master we are up", se); } } return result; }
/** * Called to verify that this server is up and running. * * @throws IOException */ protected void checkOpen() throws IOException { if (regionServer.isStopped() || regionServer.isAborted()) { throw new RegionServerStoppedException("Server " + regionServer.serverName + " not running" + (regionServer.isAborted() ? ", aborting" : "")); } if (!regionServer.fsOk) { throw new RegionServerStoppedException("File system not available"); } if (!regionServer.isOnline()) { throw new ServerNotRunningYetException("Server is not running yet"); } }
private RegionServerStartupResponse reportForDuty() throws IOException { RegionServerStartupResponse result = null; Pair<ServerName, RegionServerStatusService.BlockingInterface> p = createRegionServerStatusStub(); this.rssStub = p.getSecond(); ServerName masterServerName = p.getFirst(); if (masterServerName == null) return result; try { this.requestCount.set(0); LOG.info("reportForDuty to master=" + masterServerName + " with port=" + this.isa.getPort() + ", startcode=" + this.startcode); long now = EnvironmentEdgeManager.currentTimeMillis(); int port = this.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); request.setPort(port); request.setServerStartCode(this.startcode); request.setServerCurrentTime(now); result = this.rssStub.regionServerStartup(null, request.build()); } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); if (ioe instanceof ClockOutOfSyncException) { LOG.fatal("Master rejected startup because clock is out of sync", ioe); // Re-throw IOE will cause RS to abort throw ioe; } else if (ioe instanceof ServerNotRunningYetException) { LOG.debug("Master is not running yet"); } else { LOG.warn("error telling master we are up", se); } } return result; }
@Override protected boolean remoteCallFailed(final MasterProcedureEnv env, final RegionStateNode regionNode, final IOException exception) { // TODO: Is there on-going rpc to cleanup? if (exception instanceof ServerCrashException) { // This exception comes from ServerCrashProcedure after log splitting. // SCP found this region as a RIT. Its call into here says it is ok to let this procedure go // on to a complete close now. This will release lock on this region so subsequent action on // region can succeed; e.g. the assign that follows this unassign when a move (w/o wait on SCP // the assign could run w/o logs being split so data loss). try { reportTransition(env, regionNode, TransitionCode.CLOSED, HConstants.NO_SEQNUM); } catch (UnexpectedStateException e) { // Should never happen. throw new RuntimeException(e); } } else if (exception instanceof RegionServerAbortedException || exception instanceof RegionServerStoppedException || exception instanceof ServerNotRunningYetException) { // TODO // RS is aborting, we cannot offline the region since the region may need to do WAL // recovery. Until we see the RS expiration, we should retry. // TODO: This should be suspend like the below where we call expire on server? LOG.info("Ignoring; waiting on ServerCrashProcedure", exception); } else if (exception instanceof NotServingRegionException) { LOG.info("IS THIS OK? ANY LOGS TO REPLAY; ACTING AS THOUGH ALL GOOD " + regionNode, exception); setTransitionState(RegionTransitionState.REGION_TRANSITION_FINISH); } else { LOG.warn("Expiring server " + this + "; " + regionNode.toShortString() + ", exception=" + exception); env.getMasterServices().getServerManager().expireServer(regionNode.getRegionLocation()); // Return false so this procedure stays in suspended state. It will be woken up by a // ServerCrashProcedure when it notices this RIT. // TODO: Add a SCP as a new subprocedure that we now come to depend on. return false; } return true; }
private RegionServerStartupResponse reportForDuty() throws IOException { if (this.masterless) return RegionServerStartupResponse.getDefaultInstance(); ServerName masterServerName = createRegionServerStatusStub(true); if (masterServerName == null) return null; RegionServerStartupResponse result = null; try { rpcServices.requestCount.reset(); rpcServices.rpcGetRequestCount.reset(); rpcServices.rpcScanRequestCount.reset(); rpcServices.rpcMultiRequestCount.reset(); rpcServices.rpcMutateRequestCount.reset(); LOG.info("reportForDuty to master=" + masterServerName + " with port=" + rpcServices.isa.getPort() + ", startcode=" + this.startcode); long now = EnvironmentEdgeManager.currentTime(); int port = rpcServices.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); if (!StringUtils.isBlank(useThisHostnameInstead)) { request.setUseThisHostnameInstead(useThisHostnameInstead); } request.setPort(port); request.setServerStartCode(this.startcode); request.setServerCurrentTime(now); result = this.rssStub.regionServerStartup(null, request.build()); } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); if (ioe instanceof ClockOutOfSyncException) { LOG.error(HBaseMarkers.FATAL, "Master rejected startup because clock is out of sync", ioe); // Re-throw IOE will cause RS to abort throw ioe; } else if (ioe instanceof ServerNotRunningYetException) { LOG.debug("Master is not running yet"); } else { LOG.warn("error telling master we are up", se); } rssStub = null; } return result; }
@Override public ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req) throws IOException { switch (rand.nextInt(5)) { case 0: throw new ServerNotRunningYetException("wait on server startup"); case 1: throw new SocketTimeoutException("simulate socket timeout"); case 2: throw new RemoteException("java.io.IOException", "unexpected exception"); } return super.sendRequest(server, req); }
private RegionServerStartupResponse reportForDuty() throws IOException { RegionServerStartupResponse result = null; Pair<ServerName, RegionServerStatusService.BlockingInterface> p = createRegionServerStatusStub(); this.rssStub = p.getSecond(); ServerName masterServerName = p.getFirst(); if (masterServerName == null) return result; try { rpcServices.requestCount.set(0); LOG.info("reportForDuty to master=" + masterServerName + " with port=" + rpcServices.isa.getPort() + ", startcode=" + this.startcode); long now = EnvironmentEdgeManager.currentTimeMillis(); int port = rpcServices.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); request.setPort(port); request.setServerStartCode(this.startcode); request.setServerCurrentTime(now); result = this.rssStub.regionServerStartup(null, request.build()); } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); if (ioe instanceof ClockOutOfSyncException) { LOG.fatal("Master rejected startup because clock is out of sync", ioe); // Re-throw IOE will cause RS to abort throw ioe; } else if (ioe instanceof ServerNotRunningYetException) { LOG.debug("Master is not running yet"); } else { LOG.warn("error telling master we are up", se); } } return result; }
void checkServiceStarted() throws ServerNotRunningYetException { if (!serviceStarted) { throw new ServerNotRunningYetException("Server is not running yet"); } }
void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException { checkServiceStarted(); if (!isInitialized()) { throw new PleaseHoldException("Master is initializing"); } }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * To get a fresh connection, the current rssStub must be null. * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ @VisibleForTesting protected synchronized ServerName createRegionServerStatusStub() { if (rssStub != null) { return masterAddressTracker.getMasterAddress(); } ServerName sn = null; long previousLogTime = 0; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; boolean interrupted = false; try { while (keepLooping()) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); } refresh = true; // let's try pull it from ZK directly if (sleep(200)) { interrupted = true; } continue; } // If we are on the active master, use the shortcut if (this instanceof HMaster && sn.equals(getServerName())) { intf = ((HMaster) this).getMasterRpcServices(); break; } try { BlockingRpcChannel channel = this.rpcClient .createBlockingRpcChannel(sn, userProvider.getCurrent(), shortOperationTimeout); intf = RegionServerStatusService.newBlockingStub(channel); break; } catch (IOException e) { if (System.currentTimeMillis() > (previousLogTime + 1000)) { e = e instanceof RemoteException ? ((RemoteException) e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { LOG.info("Master isn't available yet, retrying"); } else { LOG.warn("Unable to connect to master. Retrying. Error was:", e); } previousLogTime = System.currentTimeMillis(); } if (sleep(200)) { interrupted = true; } } } } finally { if (interrupted) { Thread.currentThread().interrupt(); } } rssStub = intf; return sn; }
/** * Get the current master from ZooKeeper and open the RPC connection to it. Method will block * until a master is available. You can break from this block by requesting the server stop. * @return master + port, or null if server has been stopped */ private ServerName getMaster() { ServerName masterServerName = null; long previousLogTime = 0; HMasterRegionInterface master = null; InetSocketAddress masterIsa = null; while (keepLooping() && master == null) { masterServerName = this.masterAddressManager.getMasterAddress(); if (masterServerName == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); sleeper.sleep(); continue; } masterIsa = new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort()); LOG.info("Attempting connect to Master server at " + masterServerName); try { // Do initial RPC setup. The final argument indicates that the RPC // should retry indefinitely. master = HBaseRPC.waitForProxy(this.rpcEngine, HMasterRegionInterface.class, HMasterRegionInterface.VERSION, masterIsa, this.conf, -1, this.rpcTimeout, this.rpcTimeout); } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException) e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ignored) { } } } LOG.info("Connected to master at " + masterIsa); this.hbaseMaster = master; return masterServerName; }
void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException { checkServiceStarted(); if (!this.initialized) { throw new PleaseHoldException("Master is initializing"); } }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * <p/> * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private synchronized ServerName createRegionServerStatusStub() { if (rssStub != null) { return masterAddressTracker.getMasterAddress(); } ServerName sn = null; long previousLogTime = 0; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; boolean interrupted = false; try { while (keepLooping()) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); } refresh = true; // let's try pull it from ZK directly if (sleep(200)) { interrupted = true; } continue; } // If we are on the active master, use the shortcut if (this instanceof HMaster && sn.equals(getServerName())) { intf = ((HMaster) this).getMasterRpcServices(); break; } try { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), operationTimeout); intf = RegionServerStatusService.newBlockingStub(channel); break; } catch (IOException e) { if (System.currentTimeMillis() > (previousLogTime + 1000)) { e = e instanceof RemoteException ? ((RemoteException) e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { LOG.info("Master isn't available yet, retrying"); } else { LOG.warn("Unable to connect to master. Retrying. Error was:", e); } previousLogTime = System.currentTimeMillis(); } if (sleep(200)) { interrupted = true; } } } } finally { if (interrupted) { Thread.currentThread().interrupt(); } } rssStub = intf; return sn; }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private Pair<ServerName, RegionServerStatusService.BlockingInterface> createRegionServerStatusStub() { ServerName sn = null; long previousLogTime = 0; RegionServerStatusService.BlockingInterface master = null; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; while (keepLooping() && master == null) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); refresh = true; // let's try pull it from ZK directly sleeper.sleep(); continue; } new InetSocketAddress(sn.getHostname(), sn.getPort()); try { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), this.rpcTimeout); intf = RegionServerStatusService.newBlockingStub(channel); break; } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime+1000)){ LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ignored) { } } } return new Pair<ServerName, RegionServerStatusService.BlockingInterface>(sn, intf); }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private ServerName getMaster() { ServerName masterServerName = null; long previousLogTime = 0; HMasterRegionInterface master = null; InetSocketAddress masterIsa = null; while (keepLooping() && master == null) { masterServerName = this.masterAddressManager.getMasterAddress(); if (masterServerName == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); sleeper.sleep(); continue; } masterIsa = new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort()); LOG.info("Attempting connect to Master server at " + masterServerName); try { // Do initial RPC setup. The final argument indicates that the RPC // should retry indefinitely. master = HBaseRPC.waitForProxy(this.rpcEngine, HMasterRegionInterface.class, HMasterRegionInterface.VERSION, masterIsa, this.conf, -1, this.rpcTimeout, this.rpcTimeout); } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime+1000)){ LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ignored) { } } } LOG.info("Connected to master at " + masterIsa); this.hbaseMaster = master; return masterServerName; }
void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException, MasterNotRunningException { checkServiceStarted(); if (!isInitialized()) throw new PleaseHoldException("Master is initializing"); if (isStopped()) throw new MasterNotRunningException(); }
protected boolean scheduleForRetry(final IOException e) { // Should we wait a little before retrying? If the server is starting it's yes. final boolean hold = (e instanceof ServerNotRunningYetException); if (hold) { LOG.warn(String.format("waiting a little before trying on the same server=%s try=%d", serverName, numberOfAttemptsSoFar), e); long now = EnvironmentEdgeManager.currentTime(); if (now < getMaxWaitTime()) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("server is not yet up; waiting up to %dms", (getMaxWaitTime() - now)), e); } submitTask(this, 100, TimeUnit.MILLISECONDS); return true; } LOG.warn(String.format("server %s is not up for a while; try a new one", serverName), e); return false; } // In case socket is timed out and the region server is still online, // the openRegion RPC could have been accepted by the server and // just the response didn't go through. So we will retry to // open the region on the same server. final boolean retry = !hold && (e instanceof SocketTimeoutException && master.getServerManager().isServerOnline(serverName)); if (retry) { // we want to retry as many times as needed as long as the RS is not dead. if (LOG.isDebugEnabled()) { LOG.debug(String.format("Retrying to same RegionServer %s because: %s", serverName, e.getMessage()), e); } submitTask(this); return true; } // trying to send the request elsewhere instead LOG.warn(String.format("Failed dispatch to server=%s try=%d", serverName, numberOfAttemptsSoFar), e); return false; }
/** * Get the current master from ZooKeeper and open the RPC connection to it. To get a fresh * connection, the current rssStub must be null. Method will block until a master is available. * You can break from this block by requesting the server stop. * @param refresh If true then master address will be read from ZK, otherwise use cached data * @return master + port, or null if server has been stopped */ @VisibleForTesting protected synchronized ServerName createRegionServerStatusStub(boolean refresh) { if (rssStub != null) { return masterAddressTracker.getMasterAddress(); } ServerName sn = null; long previousLogTime = 0; RegionServerStatusService.BlockingInterface intRssStub = null; LockService.BlockingInterface intLockStub = null; boolean interrupted = false; try { while (keepLooping()) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); } refresh = true; // let's try pull it from ZK directly if (sleep(200)) { interrupted = true; } continue; } // If we are on the active master, use the shortcut if (this instanceof HMaster && sn.equals(getServerName())) { intRssStub = ((HMaster)this).getMasterRpcServices(); intLockStub = ((HMaster)this).getMasterRpcServices(); break; } try { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), shortOperationTimeout); intRssStub = RegionServerStatusService.newBlockingStub(channel); intLockStub = LockService.newBlockingStub(channel); break; } catch (IOException e) { if (System.currentTimeMillis() > (previousLogTime + 1000)) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { LOG.info("Master isn't available yet, retrying"); } else { LOG.warn("Unable to connect to master. Retrying. Error was:", e); } previousLogTime = System.currentTimeMillis(); } if (sleep(200)) { interrupted = true; } } } } finally { if (interrupted) { Thread.currentThread().interrupt(); } } this.rssStub = intRssStub; this.lockStub = intLockStub; return sn; }
@Override public ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req) throws IOException { throw new ServerNotRunningYetException("wait on server startup"); }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private Pair<ServerName, RegionServerStatusService.BlockingInterface> createRegionServerStatusStub() { ServerName sn = null; long previousLogTime = 0; RegionServerStatusService.BlockingInterface master = null; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; boolean interrupted = false; try { while (keepLooping() && master == null) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); refresh = true; // let's try pull it from ZK directly sleeper.sleep(); continue; } // If we are on the active master, use the shortcut if (this instanceof HMaster && sn.equals(getServerName())) { intf = ((HMaster)this).getMasterRpcServices(); break; } try { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), operationTimeout); intf = RegionServerStatusService.newBlockingStub(channel); break; } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime+1000)){ LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ex) { interrupted = true; LOG.warn("Interrupted while sleeping"); } } } } finally { if (interrupted) { Thread.currentThread().interrupt(); } } return new Pair<ServerName, RegionServerStatusService.BlockingInterface>(sn, intf); }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private Pair<ServerName, RegionServerStatusService.BlockingInterface> createRegionServerStatusStub() { ServerName sn = null; long previousLogTime = 0; RegionServerStatusService.BlockingInterface master = null; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; while (keepLooping() && master == null) { sn = this.masterAddressManager.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); refresh = true; // let's try pull it from ZK directly sleeper.sleep(); continue; } new InetSocketAddress(sn.getHostname(), sn.getPort()); try { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), this.rpcTimeout); intf = RegionServerStatusService.newBlockingStub(channel); break; } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime+1000)){ LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ignored) { } } } return new Pair<ServerName, RegionServerStatusService.BlockingInterface>(sn, intf); }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private ServerName getMaster() { ServerName masterServerName = null; long previousLogTime = 0; HMasterRegionInterface master = null; InetSocketAddress masterIsa = null; while (keepLooping() && master == null) { masterServerName = this.masterAddressManager.getMasterAddress(); if (masterServerName == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); sleeper.sleep(); continue; } masterIsa = new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort()); LOG.info("Attempting connect to Master server at " + this.masterAddressManager.getMasterAddress()); try { // Do initial RPC setup. The final argument indicates that the RPC // should retry indefinitely. master = HBaseRPC.waitForProxy(this.rpcEngine, HMasterRegionInterface.class, HMasterRegionInterface.VERSION, masterIsa, this.conf, -1, this.rpcTimeout, this.rpcTimeout); } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime+1000)){ LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ignored) { } } } LOG.info("Connected to master at " + masterIsa); this.hbaseMaster = master; return masterServerName; }
/** * Get the current master from ZooKeeper and open the RPC connection to it. * * Method will block until a master is available. You can break from this * block by requesting the server stop. * * @return master + port, or null if server has been stopped */ private ServerName getMaster() { ServerName masterServerName = null; long previousLogTime = 0; RegionServerStatusProtocol master = null; boolean refresh = false; // for the first time, use cached data while (keepLooping() && master == null) { masterServerName = this.masterAddressManager.getMasterAddress(refresh); if (masterServerName == null) { if (!keepLooping()) { // give up with no connection. LOG.debug("No master found and cluster is stopped; bailing out"); return null; } LOG.debug("No master found; retry"); previousLogTime = System.currentTimeMillis(); refresh = true; // let's try pull it from ZK directly sleeper.sleep(); continue; } InetSocketAddress isa = new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort()); LOG.info("Attempting connect to Master server at " + this.masterAddressManager.getMasterAddress()); try { // Do initial RPC setup. The final argument indicates that the RPC // should retry indefinitely. master = (RegionServerStatusProtocol) HBaseClientRPC.waitForProxy( RegionServerStatusProtocol.class, isa, this.conf, -1, this.rpcTimeout, this.rpcTimeout); LOG.info("Connected to master at " + isa); } catch (IOException e) { e = e instanceof RemoteException ? ((RemoteException)e).unwrapRemoteException() : e; if (e instanceof ServerNotRunningYetException) { if (System.currentTimeMillis() > (previousLogTime+1000)){ LOG.info("Master isn't available yet, retrying"); previousLogTime = System.currentTimeMillis(); } } else { if (System.currentTimeMillis() > (previousLogTime + 1000)) { LOG.warn("Unable to connect to master. Retrying. Error was:", e); previousLogTime = System.currentTimeMillis(); } } try { Thread.sleep(200); } catch (InterruptedException ignored) { } } } this.hbaseMaster = master; return masterServerName; }
@Test public void testRetryNoRouteToHostException() throws Throwable { WriteConfiguration config = new DefaultWriteConfiguration(new Monitor(0,0,10,10L,0),pef); WriteResult writeResult = new WriteResult(Code.FAILED, "NoRouteToHostException:No route to host"); BulkWriteResult bulkWriteResult = new BulkWriteResult(writeResult); WriteResponse response = config.processGlobalResult(bulkWriteResult); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.FAILED, "FailedServerException:This server is in the failed servers list"); bulkWriteResult = new BulkWriteResult(writeResult); response = config.processGlobalResult(bulkWriteResult); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.FAILED, "ServerNotRunningYetException"); bulkWriteResult = new BulkWriteResult(writeResult); response = config.processGlobalResult(bulkWriteResult); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.FAILED, "ConnectTimeoutException"); bulkWriteResult = new BulkWriteResult(writeResult); response = config.processGlobalResult(bulkWriteResult); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.PARTIAL); IntObjectOpenHashMap<WriteResult> failedRows = new IntObjectOpenHashMap<>(); failedRows.put(1, new WriteResult(Code.FAILED, "NoRouteToHostException:No route to host")); bulkWriteResult = new BulkWriteResult(writeResult, new IntOpenHashSet(), failedRows); response = config.partialFailure(bulkWriteResult, null); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.PARTIAL); failedRows = new IntObjectOpenHashMap<>(); failedRows.put(1, new WriteResult(Code.FAILED, "FailedServerException:This server is in the failed servers list")); bulkWriteResult = new BulkWriteResult(writeResult, new IntOpenHashSet(), failedRows); response = config.partialFailure(bulkWriteResult, null); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.PARTIAL); failedRows = new IntObjectOpenHashMap<>(); failedRows.put(1, new WriteResult(Code.FAILED, "ServerNotRunningYetException")); bulkWriteResult = new BulkWriteResult(writeResult, new IntOpenHashSet(), failedRows); response = config.partialFailure(bulkWriteResult, null); Assert.assertEquals(WriteResponse.RETRY, response); writeResult = new WriteResult(Code.PARTIAL); failedRows = new IntObjectOpenHashMap<>(); failedRows.put(1, new WriteResult(Code.FAILED, "ConnectTimeoutException")); bulkWriteResult = new BulkWriteResult(writeResult, new IntOpenHashSet(), failedRows); response = config.partialFailure(bulkWriteResult, null); Assert.assertEquals(WriteResponse.RETRY, response); NoRouteToHostException nrthe = new NoRouteToHostException(); response = config.globalError(nrthe); Assert.assertEquals(WriteResponse.RETRY, response); FailedServerException failedServerException = new FailedServerException("Failed server"); response = config.globalError(failedServerException); Assert.assertEquals(WriteResponse.RETRY, response); ServerNotRunningYetException serverNotRunningYetException = new ServerNotRunningYetException("Server not running"); response = config.globalError(serverNotRunningYetException); Assert.assertEquals(WriteResponse.RETRY, response); ConnectTimeoutException connectTimeoutException = new ConnectTimeoutException("connect timeout"); response = config.globalError(connectTimeoutException); Assert.assertEquals(WriteResponse.RETRY, response); }
/** * Test that verifyMetaRegionLocation properly handles getting a * ServerNotRunningException. See HBASE-4470. * Note this doesn't check the exact exception thrown in the * HBASE-4470 as there it is thrown from getHConnection() and * here it is thrown from get() -- but those are both called * from the same function anyway, and this way is less invasive than * throwing from getHConnection would be. * * @throws IOException * @throws InterruptedException * @throws KeeperException * @throws ServiceException */ @Test public void testVerifyMetaRegionServerNotRunning() throws IOException, InterruptedException, KeeperException, ServiceException { testVerifyMetaRegionLocationWithException(new ServerNotRunningYetException("mock")); }
/** * Test that verifyMetaRegionLocation properly handles getting a * ServerNotRunningException. See HBASE-4470. * Note this doesn't check the exact exception thrown in the * HBASE-4470 as there it is thrown from getHConnection() and * here it is thrown from get() -- but those are both called * from the same function anyway, and this way is less invasive than * throwing from getHConnection would be. * * @throws IOException * @throws InterruptedException * @throws KeeperException */ @Test public void testVerifyMetaRegionServerNotRunning() throws IOException, InterruptedException, KeeperException { testVerifyMetaRegionLocationWithException(new ServerNotRunningYetException("mock")); }