/** * The given node has reported in. This method should: * 1) Record the heartbeat, so the datanode isn't timed out * 2) Adjust usage stats for future block allocation * * If a substantial amount of time passed since the last datanode * heartbeat then request an immediate block report. * * @return an array of datanode commands * @throws IOException */ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { readLock(); try { //get datanode commands final int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress; DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary); //create ha status final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( haContext.getState().getServiceState(), getFSImage().getLastAppliedOrWrittenTxId()); return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); } finally { readUnlock(); } }
HeartbeatResponse sendHeartBeat() throws IOException { StorageReport[] reports = dn.getFSDataset().getStorageReports(bpos.getBlockPoolId()); if (LOG.isDebugEnabled()) { LOG.debug("Sending heartbeat with " + reports.length + " storage reports from service actor: " + this); } VolumeFailureSummary volumeFailureSummary = dn.getFSDataset() .getVolumeFailureSummary(); int numFailedVolumes = volumeFailureSummary != null ? volumeFailureSummary.getFailedStorageLocations().length : 0; return bpNamenode.sendHeartbeat(bpRegistration, reports, dn.getFSDataset().getCacheCapacity(), dn.getFSDataset().getCacheUsed(), dn.getXmitsInProgress(), dn.getXceiverCount(), numFailedVolumes, volumeFailureSummary); }
@Override // FsDatasetSpi public VolumeFailureSummary getVolumeFailureSummary() { VolumeFailureInfo[] infos = volumes.getVolumeFailureInfos(); if (infos.length == 0) { return null; } List<String> failedStorageLocations = Lists.newArrayListWithCapacity( infos.length); long lastVolumeFailureDate = 0; long estimatedCapacityLostTotal = 0; for (VolumeFailureInfo info: infos) { failedStorageLocations.add(info.getFailedStorageLocation()); long failureDate = info.getFailureDate(); if (failureDate > lastVolumeFailureDate) { lastVolumeFailureDate = failureDate; } estimatedCapacityLostTotal += info.getEstimatedCapacityLost(); } return new VolumeFailureSummary( failedStorageLocations.toArray(new String[failedStorageLocations.size()]), lastVolumeFailureDate, estimatedCapacityLostTotal); }
/** * Set up a mock NN with the bare minimum for a DN to register to it. */ private DatanodeProtocolClientSideTranslatorPB setupNNMock(int nnIdx) throws Exception { DatanodeProtocolClientSideTranslatorPB mock = Mockito.mock(DatanodeProtocolClientSideTranslatorPB.class); Mockito.doReturn(new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID, 0)) .when(mock).versionRequest(); Mockito.doReturn(DFSTestUtil.getLocalDatanodeRegistration()) .when(mock).registerDatanode(Mockito.any(DatanodeRegistration.class)); Mockito.doAnswer(new HeartbeatAnswer(nnIdx)) .when(mock).sendHeartbeat( Mockito.any(DatanodeRegistration.class), Mockito.any(StorageReport[].class), Mockito.anyLong(), Mockito.anyLong(), Mockito.anyInt(), Mockito.anyInt(), Mockito.anyInt(), Mockito.any(VolumeFailureSummary.class)); mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0); return mock; }
/** * Checks NameNode tracking of a particular DataNode for correct reporting of * failed volumes. * * @param dm DatanodeManager to check * @param dn DataNode to check * @param expectCapacityKnown if true, then expect that the capacities of the * volumes were known before the failures, and therefore the lost capacity * can be reported * @param expectedFailedVolumes expected locations of failed volumes * @throws Exception if there is any failure */ private void checkFailuresAtNameNode(DatanodeManager dm, DataNode dn, boolean expectCapacityKnown, String... expectedFailedVolumes) throws Exception { DatanodeDescriptor dd = cluster.getNamesystem().getBlockManager() .getDatanodeManager().getDatanode(dn.getDatanodeId()); assertEquals(expectedFailedVolumes.length, dd.getVolumeFailures()); VolumeFailureSummary volumeFailureSummary = dd.getVolumeFailureSummary(); if (expectedFailedVolumes.length > 0) { assertArrayEquals(expectedFailedVolumes, volumeFailureSummary .getFailedStorageLocations()); assertTrue(volumeFailureSummary.getLastVolumeFailureDate() > 0); long expectedCapacityLost = getExpectedCapacityLost(expectCapacityKnown, expectedFailedVolumes.length); assertEquals(expectedCapacityLost, volumeFailureSummary.getEstimatedCapacityLostTotal()); } else { assertNull(volumeFailureSummary); } }
HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) throws IOException { scheduler.scheduleNextHeartbeat(); StorageReport[] reports = dn.getFSDataset().getStorageReports(bpos.getBlockPoolId()); if (LOG.isDebugEnabled()) { LOG.debug("Sending heartbeat with " + reports.length + " storage reports from service actor: " + this); } VolumeFailureSummary volumeFailureSummary = dn.getFSDataset() .getVolumeFailureSummary(); int numFailedVolumes = volumeFailureSummary != null ? volumeFailureSummary.getFailedStorageLocations().length : 0; return bpNamenode.sendHeartbeat(bpRegistration, reports, dn.getFSDataset().getCacheCapacity(), dn.getFSDataset().getCacheUsed(), dn.getXmitsInProgress(), dn.getXceiverCount(), numFailedVolumes, volumeFailureSummary, requestBlockReportLease); }
/** * Set up a mock NN with the bare minimum for a DN to register to it. */ private DatanodeProtocolClientSideTranslatorPB setupNNMock(int nnIdx) throws Exception { DatanodeProtocolClientSideTranslatorPB mock = Mockito.mock(DatanodeProtocolClientSideTranslatorPB.class); Mockito.doReturn(new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID, 0)) .when(mock).versionRequest(); Mockito.doReturn(DFSTestUtil.getLocalDatanodeRegistration()) .when(mock).registerDatanode(Mockito.any(DatanodeRegistration.class)); Mockito.doAnswer(new HeartbeatAnswer(nnIdx)) .when(mock).sendHeartbeat( Mockito.any(DatanodeRegistration.class), Mockito.any(StorageReport[].class), Mockito.anyLong(), Mockito.anyLong(), Mockito.anyInt(), Mockito.anyInt(), Mockito.anyInt(), Mockito.any(VolumeFailureSummary.class), Mockito.anyBoolean()); mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0); return mock; }
@Override // FSNamesystemMBean public long getEstimatedCapacityLostTotal() { List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); long estimatedCapacityLostTotal = 0; for (DatanodeDescriptor node: live) { VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); if (volumeFailureSummary != null) { estimatedCapacityLostTotal += volumeFailureSummary.getEstimatedCapacityLostTotal(); } } return estimatedCapacityLostTotal; }
@Override // DatanodeProtocol public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg, StorageReport[] report, long dnCacheCapacity, long dnCacheUsed, int xmitsInProgress, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { checkNNStartup(); verifyRequest(nodeReg); return namesystem.handleHeartbeat(nodeReg, report, dnCacheCapacity, dnCacheUsed, xceiverCount, xmitsInProgress, failedVolumes, volumeFailureSummary); }
synchronized void updateHeartbeat(final DatanodeDescriptor node, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary) { stats.subtract(node); node.updateHeartbeat(reports, cacheCapacity, cacheUsed, xceiverCount, failedVolumes, volumeFailureSummary); stats.add(node); }
/** * Updates stats from datanode heartbeat. */ public void updateHeartbeat(StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary) { updateHeartbeatState(reports, cacheCapacity, cacheUsed, xceiverCount, volFailures, volumeFailureSummary); heartbeatedSinceRegistration = true; }
@Override public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xmitsInProgress, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary) throws IOException { HeartbeatRequestProto.Builder builder = HeartbeatRequestProto.newBuilder() .setRegistration(PBHelper.convert(registration)) .setXmitsInProgress(xmitsInProgress).setXceiverCount(xceiverCount) .setFailedVolumes(failedVolumes); builder.addAllReports(PBHelper.convertStorageReports(reports)); if (cacheCapacity != 0) { builder.setCacheCapacity(cacheCapacity); } if (cacheUsed != 0) { builder.setCacheUsed(cacheUsed); } if (volumeFailureSummary != null) { builder.setVolumeFailureSummary(PBHelper.convertVolumeFailureSummary( volumeFailureSummary)); } HeartbeatResponseProto resp; try { resp = rpcProxy.sendHeartbeat(NULL_CONTROLLER, builder.build()); } catch (ServiceException se) { throw ProtobufHelper.getRemoteException(se); } DatanodeCommand[] cmds = new DatanodeCommand[resp.getCmdsList().size()]; int index = 0; for (DatanodeCommandProto p : resp.getCmdsList()) { cmds[index] = PBHelper.convert(p); index++; } RollingUpgradeStatus rollingUpdateStatus = null; if (resp.hasRollingUpgradeStatus()) { rollingUpdateStatus = PBHelper.convert(resp.getRollingUpgradeStatus()); } return new HeartbeatResponse(cmds, PBHelper.convert(resp.getHaStatus()), rollingUpdateStatus); }
public static VolumeFailureSummary convertVolumeFailureSummary( VolumeFailureSummaryProto proto) { List<String> failedStorageLocations = proto.getFailedStorageLocationsList(); return new VolumeFailureSummary( failedStorageLocations.toArray(new String[failedStorageLocations.size()]), proto.getLastVolumeFailureDate(), proto.getEstimatedCapacityLostTotal()); }
public static VolumeFailureSummaryProto convertVolumeFailureSummary( VolumeFailureSummary volumeFailureSummary) { VolumeFailureSummaryProto.Builder builder = VolumeFailureSummaryProto.newBuilder(); for (String failedStorageLocation: volumeFailureSummary.getFailedStorageLocations()) { builder.addFailedStorageLocations(failedStorageLocation); } builder.setLastVolumeFailureDate( volumeFailureSummary.getLastVolumeFailureDate()); builder.setEstimatedCapacityLostTotal( volumeFailureSummary.getEstimatedCapacityLostTotal()); return builder.build(); }
@Override public HeartbeatResponseProto sendHeartbeat(RpcController controller, HeartbeatRequestProto request) throws ServiceException { HeartbeatResponse response; try { final StorageReport[] report = PBHelper.convertStorageReports( request.getReportsList()); VolumeFailureSummary volumeFailureSummary = request.hasVolumeFailureSummary() ? PBHelper.convertVolumeFailureSummary( request.getVolumeFailureSummary()) : null; response = impl.sendHeartbeat(PBHelper.convert(request.getRegistration()), report, request.getCacheCapacity(), request.getCacheUsed(), request.getXmitsInProgress(), request.getXceiverCount(), request.getFailedVolumes(), volumeFailureSummary); } catch (IOException e) { throw new ServiceException(e); } HeartbeatResponseProto.Builder builder = HeartbeatResponseProto .newBuilder(); DatanodeCommand[] cmds = response.getCommands(); if (cmds != null) { for (int i = 0; i < cmds.length; i++) { if (cmds[i] != null) { builder.addCmds(PBHelper.convert(cmds[i])); } } } builder.setHaStatus(PBHelper.convert(response.getNameNodeHaState())); RollingUpgradeStatus rollingUpdateStatus = response .getRollingUpdateStatus(); if (rollingUpdateStatus != null) { builder.setRollingUpgradeStatus(PBHelper .convertRollingUpgradeStatus(rollingUpdateStatus)); } return builder.build(); }
/** * Ensure that storage type and storage state are propagated * in Storage Reports. */ @Test public void testStorageReportHasStorageTypeAndState() throws IOException { // Make sure we are not testing with the default type, that would not // be a very good test. assertNotSame(storageType, StorageType.DEFAULT); NameNode nn = cluster.getNameNode(); DataNode dn = cluster.getDataNodes().get(0); // Insert a spy object for the NN RPC. DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.spyOnBposToNN(dn, nn); // Trigger a heartbeat so there is an interaction with the spy // object. DataNodeTestUtils.triggerHeartbeat(dn); // Verify that the callback passed in the expected parameters. ArgumentCaptor<StorageReport[]> captor = ArgumentCaptor.forClass(StorageReport[].class); Mockito.verify(nnSpy).sendHeartbeat( any(DatanodeRegistration.class), captor.capture(), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), Mockito.any(VolumeFailureSummary.class)); StorageReport[] reports = captor.getValue(); for (StorageReport report: reports) { assertThat(report.getStorage().getStorageType(), is(storageType)); assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL)); } }
private static void setHeartbeatResponse(DatanodeCommand[] cmds) throws IOException { NNHAStatusHeartbeat ha = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, fsImage.getLastAppliedOrWrittenTxId()); HeartbeatResponse response = new HeartbeatResponse(cmds, ha, null); doReturn(response).when(spyNN).sendHeartbeat( (DatanodeRegistration) any(), (StorageReport[]) any(), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), (VolumeFailureSummary) any()); }
/** * The given node has reported in. This method should: * 1) Record the heartbeat, so the datanode isn't timed out * 2) Adjust usage stats for future block allocation * * If a substantial amount of time passed since the last datanode * heartbeat then request an immediate block report. * * @return an array of datanode commands * @throws IOException */ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int xmitsInProgress, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease) throws IOException { readLock(); try { //get datanode commands final int maxTransfer = blockManager.getMaxReplicationStreams() - xmitsInProgress; DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( nodeReg, reports, getBlockPoolId(), cacheCapacity, cacheUsed, xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary); long blockReportLeaseId = 0; if (requestFullBlockReportLease) { blockReportLeaseId = blockManager.requestBlockReportLeaseId(nodeReg); } //create ha status final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( haContext.getState().getServiceState(), getFSImage().getLastAppliedOrWrittenTxId()); return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId); } finally { readUnlock(); } }
@Override // FSNamesystemMBean public long getEstimatedCapacityLostTotal() { List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); long estimatedCapacityLostTotal = 0; for (DatanodeDescriptor node: live) { VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); if (volumeFailureSummary != null) { estimatedCapacityLostTotal += volumeFailureSummary.getEstimatedCapacityLostTotal(); } } return estimatedCapacityLostTotal; }
@Override // DatanodeProtocol public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg, StorageReport[] report, long dnCacheCapacity, long dnCacheUsed, int xmitsInProgress, int xceiverCount, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease) throws IOException { checkNNStartup(); verifyRequest(nodeReg); return namesystem.handleHeartbeat(nodeReg, report, dnCacheCapacity, dnCacheUsed, xceiverCount, xmitsInProgress, failedVolumes, volumeFailureSummary, requestFullBlockReportLease); }
/** * Ensure that storage type and storage state are propagated * in Storage Reports. */ @Test public void testStorageReportHasStorageTypeAndState() throws IOException { // Make sure we are not testing with the default type, that would not // be a very good test. assertNotSame(storageType, StorageType.DEFAULT); NameNode nn = cluster.getNameNode(); DataNode dn = cluster.getDataNodes().get(0); // Insert a spy object for the NN RPC. DatanodeProtocolClientSideTranslatorPB nnSpy = DataNodeTestUtils.spyOnBposToNN(dn, nn); // Trigger a heartbeat so there is an interaction with the spy // object. DataNodeTestUtils.triggerHeartbeat(dn); // Verify that the callback passed in the expected parameters. ArgumentCaptor<StorageReport[]> captor = ArgumentCaptor.forClass(StorageReport[].class); Mockito.verify(nnSpy).sendHeartbeat( any(DatanodeRegistration.class), captor.capture(), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), Mockito.any(VolumeFailureSummary.class), Mockito.anyBoolean()); StorageReport[] reports = captor.getValue(); for (StorageReport report: reports) { assertThat(report.getStorage().getStorageType(), is(storageType)); assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL)); } }
private static void setHeartbeatResponse(DatanodeCommand[] cmds) throws IOException { NNHAStatusHeartbeat ha = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, fsImage.getLastAppliedOrWrittenTxId()); HeartbeatResponse response = new HeartbeatResponse(cmds, ha, null, ThreadLocalRandom.current().nextLong() | 1L); doReturn(response).when(spyNN).sendHeartbeat( (DatanodeRegistration) any(), (StorageReport[]) any(), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), (VolumeFailureSummary) any(), anyBoolean()); }