/** * The given node has reported in. This method should: * 1) Record the heartbeat, so the datanode isn't timed out * 2) Adjust usage stats for future block allocation * * If a substantial amount of time passed since the last datanode * heartbeat then request an immediate block report. * * @return an array of datanode commands * @throws IOException */ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { readLock(); try { //get datanode commands final int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress; DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary); //create ha status final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( haContext.getState().getServiceState(), getFSImage().getLastAppliedOrWrittenTxId()); return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); } finally { readUnlock(); } }
@Override // DatanodeProtocol public void errorReport(DatanodeRegistration nodeReg, int errorCode, String msg) throws IOException { checkNNStartup(); String dnName = (nodeReg == null) ? "Unknown DataNode" : nodeReg.toString(); if (errorCode == DatanodeProtocol.NOTIFY) { LOG.info("Error report from " + dnName + ": " + msg); return; } verifyRequest(nodeReg); if (errorCode == DatanodeProtocol.DISK_ERROR) { LOG.warn("Disk error on " + dnName + ": " + msg); } else if (errorCode == DatanodeProtocol.FATAL_DISK_ERROR) { LOG.warn("Fatal disk error on " + dnName + ": " + msg); namesystem.getBlockManager().getDatanodeManager().removeDatanode(nodeReg); } else { LOG.info("Error report from " + dnName + ": " + msg); } }
/** * Check that the registration returned from a NameNode is consistent * with the information in the storage. If the storage is fresh/unformatted, * sets the storage ID based on this registration. * Also updates the block pool's state in the secret manager. */ synchronized void bpRegistrationSucceeded(DatanodeRegistration bpRegistration, String blockPoolId) throws IOException { // Set the ID if we haven't already if (null == id) { id = bpRegistration; } if(!storage.getDatanodeUuid().equals(bpRegistration.getDatanodeUuid())) { throw new IOException("Inconsistent Datanode IDs. Name-node returned " + bpRegistration.getDatanodeUuid() + ". Expecting " + storage.getDatanodeUuid()); } registerBlockPoolWithSecretManager(bpRegistration, blockPoolId); }
@Override public void reportTo(DatanodeProtocolClientSideTranslatorPB bpNamenode, DatanodeRegistration bpRegistration) throws BPServiceActorActionException { if (bpRegistration == null) { return; } DatanodeInfo[] dnArr = { new DatanodeInfo(bpRegistration) }; String[] uuids = { storageUuid }; StorageType[] types = { storageType }; LocatedBlock[] locatedBlock = { new LocatedBlock(block, dnArr, uuids, types) }; try { bpNamenode.reportBadBlocks(locatedBlock); } catch (RemoteException re) { DataNode.LOG.info("reportBadBlock encountered RemoteException for " + "block: " + block , re); } catch (IOException e) { throw new BPServiceActorActionException("Failed to report bad block " + block + " to namenode: "); } }
@Override public DatanodeCommand cacheReport(DatanodeRegistration registration, String poolId, List<Long> blockIds) throws IOException { CacheReportRequestProto.Builder builder = CacheReportRequestProto.newBuilder() .setRegistration(PBHelper.convert(registration)) .setBlockPoolId(poolId); for (Long blockId : blockIds) { builder.addBlocks(blockId); } CacheReportResponseProto resp; try { resp = rpcProxy.cacheReport(NULL_CONTROLLER, builder.build()); } catch (ServiceException se) { throw ProtobufHelper.getRemoteException(se); } if (resp.hasCmd()) { return PBHelper.convert(resp.getCmd()); } return null; }
@Override public void blockReceivedAndDeleted(DatanodeRegistration registration, String poolId, StorageReceivedDeletedBlocks[] receivedAndDeletedBlocks) throws IOException { BlockReceivedAndDeletedRequestProto.Builder builder = BlockReceivedAndDeletedRequestProto.newBuilder() .setRegistration(PBHelper.convert(registration)) .setBlockPoolId(poolId); for (StorageReceivedDeletedBlocks storageBlock : receivedAndDeletedBlocks) { StorageReceivedDeletedBlocksProto.Builder repBuilder = StorageReceivedDeletedBlocksProto.newBuilder(); repBuilder.setStorageUuid(storageBlock.getStorage().getStorageID()); // Set for wire compatibility. repBuilder.setStorage(PBHelper.convert(storageBlock.getStorage())); for (ReceivedDeletedBlockInfo rdBlock : storageBlock.getBlocks()) { repBuilder.addBlocks(PBHelper.convert(rdBlock)); } builder.addBlocks(repBuilder.build()); } try { rpcProxy.blockReceivedAndDeleted(NULL_CONTROLLER, builder.build()); } catch (ServiceException se) { throw ProtobufHelper.getRemoteException(se); } }
void register() throws IOException { // get versions from the namenode nsInfo = nameNodeProto.versionRequest(); dnRegistration = new DatanodeRegistration( new DatanodeID(DNS.getDefaultIP("default"), DNS.getDefaultHost("default", "default"), DataNode.generateUuid(), getNodePort(dnIdx), DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT), new DataStorage(nsInfo), new ExportedBlockKeys(), VersionInfo.getVersion()); // register datanode dnRegistration = nameNodeProto.registerDatanode(dnRegistration); //first block reports storage = new DatanodeStorage(DatanodeStorage.generateUuid()); final StorageBlockReport[] reports = { new StorageBlockReport(storage, BlockListAsLongs.EMPTY) }; nameNodeProto.blockReport(dnRegistration, nameNode.getNamesystem().getBlockPoolId(), reports, new BlockReportContext(1, 0, System.nanoTime())); }
@Test public void testSafeModeIBRAfterIncremental() throws Exception { DatanodeDescriptor node = spy(nodes.get(0)); DatanodeStorageInfo ds = node.getStorageInfos()[0]; node.isAlive = true; DatanodeRegistration nodeReg = new DatanodeRegistration(node, null, null, ""); // pretend to be in safemode doReturn(true).when(fsn).isInStartupSafeMode(); // register new node bm.getDatanodeManager().registerDatanode(nodeReg); bm.getDatanodeManager().addDatanode(node); // swap in spy assertEquals(node, bm.getDatanodeManager().getDatanode(node)); assertEquals(0, ds.getBlockReportCount()); // send block report while pretending to already have blocks reset(node); doReturn(1).when(node).numBlocks(); bm.processReport(node, new DatanodeStorage(ds.getStorageID()), BlockListAsLongs.EMPTY, null, false); assertEquals(1, ds.getBlockReportCount()); }
/** * Set up a mock NN with the bare minimum for a DN to register to it. */ private DatanodeProtocolClientSideTranslatorPB setupNNMock(int nnIdx) throws Exception { DatanodeProtocolClientSideTranslatorPB mock = Mockito.mock(DatanodeProtocolClientSideTranslatorPB.class); Mockito.doReturn(new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID, 0)) .when(mock).versionRequest(); Mockito.doReturn(DFSTestUtil.getLocalDatanodeRegistration()) .when(mock).registerDatanode(Mockito.any(DatanodeRegistration.class)); Mockito.doAnswer(new HeartbeatAnswer(nnIdx)) .when(mock).sendHeartbeat( Mockito.any(DatanodeRegistration.class), Mockito.any(StorageReport[].class), Mockito.anyLong(), Mockito.anyLong(), Mockito.anyInt(), Mockito.anyInt(), Mockito.anyInt(), Mockito.any(VolumeFailureSummary.class)); mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0); return mock; }
private void waitForBlockReport(final DatanodeProtocolClientSideTranslatorPB mockNN) throws Exception { GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { try { Mockito.verify(mockNN).blockReport( Mockito.<DatanodeRegistration>anyObject(), Mockito.eq(FAKE_BPID), Mockito.<StorageBlockReport[]>anyObject(), Mockito.<BlockReportContext>anyObject()); return true; } catch (Throwable t) { LOG.info("waiting on block report: " + t.getMessage()); return false; } } }, 500, 10000); }
private void waitForBlockReport( final DatanodeProtocolClientSideTranslatorPB mockNN1, final DatanodeProtocolClientSideTranslatorPB mockNN2) throws Exception { GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { return get(mockNN1) || get(mockNN2); } private Boolean get(DatanodeProtocolClientSideTranslatorPB mockNN) { try { Mockito.verify(mockNN).blockReport( Mockito.<DatanodeRegistration>anyObject(), Mockito.eq(FAKE_BPID), Mockito.<StorageBlockReport[]>anyObject(), Mockito.<BlockReportContext>anyObject()); return true; } catch (Throwable t) { LOG.info("waiting on block report: " + t.getMessage()); return false; } } }, 500, 10000); }
private ReceivedDeletedBlockInfo[] waitForBlockReceived( final ExtendedBlock fakeBlock, final DatanodeProtocolClientSideTranslatorPB mockNN) throws Exception { final String fakeBlockPoolId = fakeBlock.getBlockPoolId(); final ArgumentCaptor<StorageReceivedDeletedBlocks[]> captor = ArgumentCaptor.forClass(StorageReceivedDeletedBlocks[].class); GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { try { Mockito.verify(mockNN).blockReceivedAndDeleted( Mockito.<DatanodeRegistration>anyObject(), Mockito.eq(fakeBlockPoolId), captor.capture()); return true; } catch (Throwable t) { return false; } } }, 100, 10000); return captor.getValue()[0].getBlocks(); }
/** * Ensure that an IBR is generated immediately for a block received by * the DN. * * @throws InterruptedException * @throws IOException */ @Test (timeout=60000) public void testReportBlockReceived() throws InterruptedException, IOException { try { DatanodeProtocolClientSideTranslatorPB nnSpy = spyOnDnCallsToNn(); injectBlockReceived(); // Sleep for a very short time, this is necessary since the IBR is // generated asynchronously. Thread.sleep(2000); // Ensure that the received block was reported immediately. Mockito.verify(nnSpy, times(1)).blockReceivedAndDeleted( any(DatanodeRegistration.class), anyString(), any(StorageReceivedDeletedBlocks[].class)); } finally { cluster.shutdown(); cluster = null; } }
/** * Test writes a file and closes it. * Block reported is generated with a bad GS for a single block. * Block report is forced and the check for # of corrupted blocks is performed. * * @throws IOException in case of an error */ @Test(timeout=300000) public void blockReport_03() throws IOException { final String METHOD_NAME = GenericTestUtils.getMethodName(); Path filePath = new Path("/" + METHOD_NAME + ".dat"); writeFile(METHOD_NAME, FILE_SIZE, filePath); // all blocks belong to the same file, hence same BP DataNode dn = cluster.getDataNodes().get(DN_N0); String poolId = cluster.getNamesystem().getBlockPoolId(); DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId); StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false); sendBlockReports(dnR, poolId, reports); printStats(); assertThat("Wrong number of corrupt blocks", cluster.getNamesystem().getCorruptReplicaBlocks(), is(1L)); assertThat("Wrong number of PendingDeletion blocks", cluster.getNamesystem().getPendingDeletionBlocks(), is(0L)); }
/** * Test creates a file and closes it. * The second datanode is started in the cluster. * As soon as the replication process is completed test runs * Block report and checks that no underreplicated blocks are left * * @throws IOException in case of an error */ @Test(timeout=300000) public void blockReport_06() throws Exception { final String METHOD_NAME = GenericTestUtils.getMethodName(); Path filePath = new Path("/" + METHOD_NAME + ".dat"); final int DN_N1 = DN_N0 + 1; writeFile(METHOD_NAME, FILE_SIZE, filePath); startDNandWait(filePath, true); // all blocks belong to the same file, hence same BP DataNode dn = cluster.getDataNodes().get(DN_N1); String poolId = cluster.getNamesystem().getBlockPoolId(); DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId); StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false); sendBlockReports(dnR, poolId, reports); printStats(); assertEquals("Wrong number of PendingReplication Blocks", 0, cluster.getNamesystem().getUnderReplicatedBlocks()); }
public long requestBlockReportLeaseId(DatanodeRegistration nodeReg) { assert namesystem.hasReadLock(); DatanodeDescriptor node = null; try { node = datanodeManager.getDatanode(nodeReg); } catch (UnregisteredNodeException e) { LOG.warn("Unregistered datanode {}", nodeReg); return 0; } if (node == null) { LOG.warn("Failed to find datanode {}", nodeReg); return 0; } // Request a new block report lease. The BlockReportLeaseManager has // its own internal locking. long leaseId = blockReportLeaseManager.requestLease(node); BlockManagerFaultInjector.getInstance(). requestBlockReportLease(node, leaseId); return leaseId; }
@Override public void reportTo(DatanodeProtocolClientSideTranslatorPB bpNamenode, DatanodeRegistration bpRegistration) throws BPServiceActorActionException { if (bpRegistration == null) { return; } DatanodeInfo[] dnArr = { new DatanodeInfo(bpRegistration) }; String[] uuids = { storageUuid }; StorageType[] types = { storageType }; LocatedBlock[] locatedBlock = { new LocatedBlock(block, dnArr, uuids, types) }; try { bpNamenode.reportBadBlocks(locatedBlock); } catch (RemoteException re) { DataNode.LOG.info("reportBadBlock encountered RemoteException for " + "block: " + block , re); } catch (IOException e) { throw new BPServiceActorActionException("Failed to report bad block " + block + " to namenode.", e); } }
@Override public void blockReceivedAndDeleted(DatanodeRegistration registration, String poolId, StorageReceivedDeletedBlocks[] receivedAndDeletedBlocks) throws IOException { BlockReceivedAndDeletedRequestProto.Builder builder = BlockReceivedAndDeletedRequestProto.newBuilder() .setRegistration(PBHelper.convert(registration)) .setBlockPoolId(poolId); for (StorageReceivedDeletedBlocks storageBlock : receivedAndDeletedBlocks) { StorageReceivedDeletedBlocksProto.Builder repBuilder = StorageReceivedDeletedBlocksProto.newBuilder(); repBuilder.setStorageUuid(storageBlock.getStorage().getStorageID()); // Set for wire compatibility. repBuilder.setStorage(PBHelperClient.convert(storageBlock.getStorage())); for (ReceivedDeletedBlockInfo rdBlock : storageBlock.getBlocks()) { repBuilder.addBlocks(PBHelper.convert(rdBlock)); } builder.addBlocks(repBuilder.build()); } try { rpcProxy.blockReceivedAndDeleted(NULL_CONTROLLER, builder.build()); } catch (ServiceException se) { throw ProtobufHelper.getRemoteException(se); } }
void register() throws IOException { // get versions from the namenode nsInfo = nameNodeProto.versionRequest(); dnRegistration = new DatanodeRegistration( new DatanodeID(DNS.getDefaultIP("default"), DNS.getDefaultHost("default", "default"), DataNode.generateUuid(), getNodePort(dnIdx), DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT), new DataStorage(nsInfo), new ExportedBlockKeys(), VersionInfo.getVersion()); // register datanode dnRegistration = dataNodeProto.registerDatanode(dnRegistration); dnRegistration.setNamespaceInfo(nsInfo); //first block reports storage = new DatanodeStorage(DatanodeStorage.generateUuid()); final StorageBlockReport[] reports = { new StorageBlockReport(storage, BlockListAsLongs.EMPTY) }; dataNodeProto.blockReport(dnRegistration, bpid, reports, new BlockReportContext(1, 0, System.nanoTime(), 0L)); }
@Test public void testSafeModeIBRAfterIncremental() throws Exception { DatanodeDescriptor node = spy(nodes.get(0)); DatanodeStorageInfo ds = node.getStorageInfos()[0]; node.setAlive(true); DatanodeRegistration nodeReg = new DatanodeRegistration(node, null, null, ""); // pretend to be in safemode doReturn(true).when(fsn).isInStartupSafeMode(); // register new node bm.getDatanodeManager().registerDatanode(nodeReg); bm.getDatanodeManager().addDatanode(node); // swap in spy assertEquals(node, bm.getDatanodeManager().getDatanode(node)); assertEquals(0, ds.getBlockReportCount()); // send block report while pretending to already have blocks reset(node); doReturn(1).when(node).numBlocks(); bm.processReport(node, new DatanodeStorage(ds.getStorageID()), BlockListAsLongs.EMPTY, null, false); assertEquals(1, ds.getBlockReportCount()); }
/** * Set up a mock NN with the bare minimum for a DN to register to it. */ private DatanodeProtocolClientSideTranslatorPB setupNNMock(int nnIdx) throws Exception { DatanodeProtocolClientSideTranslatorPB mock = Mockito.mock(DatanodeProtocolClientSideTranslatorPB.class); Mockito.doReturn(new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID, 0)) .when(mock).versionRequest(); Mockito.doReturn(DFSTestUtil.getLocalDatanodeRegistration()) .when(mock).registerDatanode(Mockito.any(DatanodeRegistration.class)); Mockito.doAnswer(new HeartbeatAnswer(nnIdx)) .when(mock).sendHeartbeat( Mockito.any(DatanodeRegistration.class), Mockito.any(StorageReport[].class), Mockito.anyLong(), Mockito.anyLong(), Mockito.anyInt(), Mockito.anyInt(), Mockito.anyInt(), Mockito.any(VolumeFailureSummary.class), Mockito.anyBoolean()); mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0); return mock; }
@Override // DatanodeProtocol public DatanodeRegistration registerDatanode(DatanodeRegistration nodeReg) throws IOException { checkNNStartup(); verifySoftwareVersion(nodeReg); namesystem.registerDatanode(nodeReg); return nodeReg; }
@Override // DatanodeProtocol public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg, StorageReport[] report, long dnCacheCapacity, long dnCacheUsed, int xmitsInProgress, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { checkNNStartup(); verifyRequest(nodeReg); return namesystem.handleHeartbeat(nodeReg, report, dnCacheCapacity, dnCacheUsed, xceiverCount, xmitsInProgress, failedVolumes, volumeFailureSummary); }
@Override // DatanodeProtocol public DatanodeCommand blockReport(DatanodeRegistration nodeReg, String poolId, StorageBlockReport[] reports, BlockReportContext context) throws IOException { checkNNStartup(); verifyRequest(nodeReg); if(blockStateChangeLog.isDebugEnabled()) { blockStateChangeLog.debug("*BLOCK* NameNode.blockReport: " + "from " + nodeReg + ", reports.length=" + reports.length); } final BlockManager bm = namesystem.getBlockManager(); boolean noStaleStorages = false; for (int r = 0; r < reports.length; r++) { final BlockListAsLongs blocks = reports[r].getBlocks(); // // BlockManager.processReport accumulates information of prior calls // for the same node and storage, so the value returned by the last // call of this loop is the final updated value for noStaleStorage. // noStaleStorages = bm.processReport(nodeReg, reports[r].getStorage(), blocks, context, (r == reports.length - 1)); metrics.incrStorageBlockReportOps(); } if (nn.getFSImage().isUpgradeFinalized() && !namesystem.isRollingUpgrade() && !nn.isStandbyState() && noStaleStorages) { return new FinalizeCommand(poolId); } return null; }
@Override public DatanodeCommand cacheReport(DatanodeRegistration nodeReg, String poolId, List<Long> blockIds) throws IOException { checkNNStartup(); verifyRequest(nodeReg); if (blockStateChangeLog.isDebugEnabled()) { blockStateChangeLog.debug("*BLOCK* NameNode.cacheReport: " + "from " + nodeReg + " " + blockIds.size() + " blocks"); } namesystem.getCacheManager().processCacheReport(nodeReg, blockIds); return null; }