/** * Fail correctly if coordinator aborts the procedure. The subprocedure will not interrupt a * running {@link Subprocedure#prepare} -- prepare needs to finish first, and the the abort * is checked. Thus, the {@link Subprocedure#prepare} should succeed but later get rolled back * via {@link Subprocedure#cleanup}. */ @Test(timeout = 60000) public void testCoordinatorAbort() throws Exception { buildCohortMemberPair(); // mock that another node timed out or failed to prepare final TimeoutException oate = new TimeoutException("bogus timeout", 1,2,0); doAnswer( new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { // inject a remote error (this would have come from an external thread) spySub.cancel("bogus message", oate); // sleep the wake frequency since that is what we promised Thread.sleep(WAKE_FREQUENCY); return null; } }).when(spySub).waitForReachedGlobalBarrier(); // run the operation // build a new operation Subprocedure subproc = member.createSubprocedure(op, data); member.submitSubprocedure(subproc); // if the operation doesn't die properly, then this will timeout member.closeAndWait(TIMEOUT); // make sure everything ran in order InOrder order = inOrder(mockMemberComms, spySub); order.verify(spySub).acquireBarrier(); order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); // Later phases not run order.verify(spySub, never()).insideBarrier(); order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub), eq(data)); // error recovery path exercised order.verify(spySub).cancel(anyString(), any(Exception.class)); order.verify(spySub).cleanup(any(Exception.class)); }
/** * Handle Failures if a member's commit phase succeeds but notification to coordinator fails * * NOTE: This is the core difference that makes this different from traditional 2PC. In true * 2PC the transaction is committed just before the coordinator sends commit messages to the * member. Members are then responsible for reading its TX log. This implementation actually * rolls back, and thus breaks the normal TX guarantees. */ @Test(timeout = 60000) public void testMemberCommitCommsFailure() throws Exception { buildCohortMemberPair(); final TimeoutException oate = new TimeoutException("bogus timeout",1,2,0); doAnswer( new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { // inject a remote error (this would have come from an external thread) spySub.cancel("commit comms fail", oate); // sleep the wake frequency since that is what we promised Thread.sleep(WAKE_FREQUENCY); return null; } }).when(mockMemberComms).sendMemberCompleted(any(Subprocedure.class), eq(data)); // run the operation // build a new operation Subprocedure subproc = member.createSubprocedure(op, data); member.submitSubprocedure(subproc); // if the operation doesn't die properly, then this will timeout member.closeAndWait(TIMEOUT); // make sure everything ran in order InOrder order = inOrder(mockMemberComms, spySub); order.verify(spySub).acquireBarrier(); order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); order.verify(spySub).insideBarrier(); order.verify(mockMemberComms).sendMemberCompleted(eq(spySub), eq(data)); // error recovery path exercised order.verify(spySub).cancel(anyString(), any(Exception.class)); order.verify(spySub).cleanup(any(Exception.class)); }
/** * Returns region Result by querying the META table for regionName. It will block until * the region is found in META. It will also check for parent in META to make sure that * if parent is deleted, we no longer have to wait, and should continue (HBASE-8590) * @return Result object is daughter is found, or null if parent is gone from META * @throws TimeoutException if timeout is reached */ private Result getRegionResultBlocking(HTable metaTable, long timeout, byte[] parentRegionName, byte[] regionName) throws IOException, TimeoutException { boolean logged = false; long start = System.currentTimeMillis(); while (System.currentTimeMillis() - start < timeout) { Get get = new Get(regionName); Result result = metaTable.get(get); HRegionInfo info = Writables.getHRegionInfoOrNull( result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); if (info != null) { return result; } // check whether parent is still there, if not it means we do not need to wait Get parentGet = new Get(parentRegionName); Result parentResult = metaTable.get(parentGet); HRegionInfo parentInfo = Writables.getHRegionInfoOrNull( parentResult.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); if (parentInfo == null) { // this means that parent is no more (catalog janitor or somebody else deleted it) return null; } try { if (!logged) { if (LOG.isDebugEnabled()) { LOG.debug("blocking until region is in META: " + Bytes.toStringBinary(regionName)); } logged = true; } Thread.sleep(10); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); break; } } throw new TimeoutException("getRegionResultBlocking", start, System.currentTimeMillis(), timeout); }
/** * Fail correctly if coordinator aborts the procedure. The subprocedure will not interrupt a * running {@link Subprocedure#prepare} -- prepare needs to finish first, and the the abort * is checked. Thus, the {@link Subprocedure#prepare} should succeed but later get rolled back * via {@link Subprocedure#cleanup}. */ @Test(timeout = 60000) public void testCoordinatorAbort() throws Exception { buildCohortMemberPair(); // mock that another node timed out or failed to prepare final TimeoutException oate = new TimeoutException("bogus timeout", 1,2,0); doAnswer( new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { // inject a remote error (this would have come from an external thread) spySub.cancel("bogus message", oate); // sleep the wake frequency since that is what we promised Thread.sleep(WAKE_FREQUENCY); return null; } }).when(spySub).waitForReachedGlobalBarrier(); // run the operation // build a new operation Subprocedure subproc = member.createSubprocedure(op, data); member.submitSubprocedure(subproc); // if the operation doesn't die properly, then this will timeout member.closeAndWait(TIMEOUT); // make sure everything ran in order InOrder order = inOrder(mockMemberComms, spySub); order.verify(spySub).acquireBarrier(); order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); // Later phases not run order.verify(spySub, never()).insideBarrier(); order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub)); // error recovery path exercised order.verify(spySub).cancel(anyString(), any(Exception.class)); order.verify(spySub).cleanup(any(Exception.class)); }
/** * Handle Failures if a member's commit phase succeeds but notification to coordinator fails * * NOTE: This is the core difference that makes this different from traditional 2PC. In true * 2PC the transaction is committed just before the coordinator sends commit messages to the * member. Members are then responsible for reading its TX log. This implementation actually * rolls back, and thus breaks the normal TX guarantees. */ @Test(timeout = 60000) public void testMemberCommitCommsFailure() throws Exception { buildCohortMemberPair(); final TimeoutException oate = new TimeoutException("bogus timeout",1,2,0); doAnswer( new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { // inject a remote error (this would have come from an external thread) spySub.cancel("commit comms fail", oate); // sleep the wake frequency since that is what we promised Thread.sleep(WAKE_FREQUENCY); return null; } }).when(mockMemberComms).sendMemberCompleted(any(Subprocedure.class)); // run the operation // build a new operation Subprocedure subproc = member.createSubprocedure(op, data); member.submitSubprocedure(subproc); // if the operation doesn't die properly, then this will timeout member.closeAndWait(TIMEOUT); // make sure everything ran in order InOrder order = inOrder(mockMemberComms, spySub); order.verify(spySub).acquireBarrier(); order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); order.verify(spySub).insideBarrier(); order.verify(mockMemberComms).sendMemberCompleted(eq(spySub)); // error recovery path exercised order.verify(spySub).cancel(anyString(), any(Exception.class)); order.verify(spySub).cleanup(any(Exception.class)); }
/** * Fail correctly if coordinator aborts the procedure. The subprocedure will not interrupt a * running {@link Subprocedure#acquireBarrier()} -- prepare needs to finish first, and the the abort * is checked. Thus, the {@link Subprocedure#acquireBarrier()} should succeed but later get rolled back * via {@link Subprocedure#cleanup}. */ @Test(timeout = 60000) public void testCoordinatorAbort() throws Exception { buildCohortMemberPair(); // mock that another node timed out or failed to prepare final TimeoutException oate = new TimeoutException("bogus timeout", 1,2,0); doAnswer( new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { // inject a remote error (this would have come from an external thread) spySub.cancel("bogus message", oate); // sleep the wake frequency since that is what we promised Thread.sleep(WAKE_FREQUENCY); return null; } }).when(spySub).waitForReachedGlobalBarrier(); // run the operation // build a new operation Subprocedure subproc = member.createSubprocedure(op, data); member.submitSubprocedure(subproc); // if the operation doesn't die properly, then this will timeout member.closeAndWait(TIMEOUT); // make sure everything ran in order InOrder order = inOrder(mockMemberComms, spySub); order.verify(spySub).acquireBarrier(); order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); // Later phases not run order.verify(spySub, never()).insideBarrier(); order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub), eq(data)); // error recovery path exercised order.verify(spySub).cancel(anyString(), any()); order.verify(spySub).cleanup(any()); }
/** * Handle Failures if a member's commit phase succeeds but notification to coordinator fails * * NOTE: This is the core difference that makes this different from traditional 2PC. In true * 2PC the transaction is committed just before the coordinator sends commit messages to the * member. Members are then responsible for reading its TX log. This implementation actually * rolls back, and thus breaks the normal TX guarantees. */ @Test(timeout = 60000) public void testMemberCommitCommsFailure() throws Exception { buildCohortMemberPair(); final TimeoutException oate = new TimeoutException("bogus timeout",1,2,0); doAnswer( new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { // inject a remote error (this would have come from an external thread) spySub.cancel("commit comms fail", oate); // sleep the wake frequency since that is what we promised Thread.sleep(WAKE_FREQUENCY); return null; } }).when(mockMemberComms).sendMemberCompleted(any(), eq(data)); // run the operation // build a new operation Subprocedure subproc = member.createSubprocedure(op, data); member.submitSubprocedure(subproc); // if the operation doesn't die properly, then this will timeout member.closeAndWait(TIMEOUT); // make sure everything ran in order InOrder order = inOrder(mockMemberComms, spySub); order.verify(spySub).acquireBarrier(); order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); order.verify(spySub).insideBarrier(); order.verify(mockMemberComms).sendMemberCompleted(eq(spySub), eq(data)); // error recovery path exercised order.verify(spySub).cancel(anyString(), any()); order.verify(spySub).cleanup(any()); }