/** * Check that the NameNode is not attempting to cache anything. */ private void checkPendingCachedEmpty(MiniDFSCluster cluster) throws Exception { cluster.getNamesystem().readLock(); try { final DatanodeManager datanodeManager = cluster.getNamesystem().getBlockManager().getDatanodeManager(); for (DataNode dn : cluster.getDataNodes()) { DatanodeDescriptor descriptor = datanodeManager.getDatanode(dn.getDatanodeId()); Assert.assertTrue("Pending cached list of " + descriptor + " is not empty, " + Arrays.toString(descriptor.getPendingCached().toArray()), descriptor.getPendingCached().isEmpty()); } } finally { cluster.getNamesystem().readUnlock(); } }
DatanodeInfo[] datanodeReport(final DatanodeReportType type ) throws AccessControlException, StandbyException { checkSuperuserPrivilege(); checkOperation(OperationCategory.UNCHECKED); readLock(); try { checkOperation(OperationCategory.UNCHECKED); final DatanodeManager dm = getBlockManager().getDatanodeManager(); final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); DatanodeInfo[] arr = new DatanodeInfo[results.size()]; for (int i=0; i<arr.length; i++) { arr[i] = new DatanodeInfo(results.get(i)); } return arr; } finally { readUnlock(); } }
DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type ) throws AccessControlException, StandbyException { checkSuperuserPrivilege(); checkOperation(OperationCategory.UNCHECKED); readLock(); try { checkOperation(OperationCategory.UNCHECKED); final DatanodeManager dm = getBlockManager().getDatanodeManager(); final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type); DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()]; for (int i = 0; i < reports.length; i++) { final DatanodeDescriptor d = datanodes.get(i); reports[i] = new DatanodeStorageReport(new DatanodeInfo(d), d.getStorageReports()); } return reports; } finally { readUnlock(); } }
/** * Checks NameNode tracking of a particular DataNode for correct reporting of * failed volumes. * * @param dm DatanodeManager to check * @param dn DataNode to check * @param expectCapacityKnown if true, then expect that the capacities of the * volumes were known before the failures, and therefore the lost capacity * can be reported * @param expectedFailedVolumes expected locations of failed volumes * @throws Exception if there is any failure */ private void checkFailuresAtNameNode(DatanodeManager dm, DataNode dn, boolean expectCapacityKnown, String... expectedFailedVolumes) throws Exception { DatanodeDescriptor dd = cluster.getNamesystem().getBlockManager() .getDatanodeManager().getDatanode(dn.getDatanodeId()); assertEquals(expectedFailedVolumes.length, dd.getVolumeFailures()); VolumeFailureSummary volumeFailureSummary = dd.getVolumeFailureSummary(); if (expectedFailedVolumes.length > 0) { assertArrayEquals(expectedFailedVolumes, volumeFailureSummary .getFailedStorageLocations()); assertTrue(volumeFailureSummary.getLastVolumeFailureDate() > 0); long expectedCapacityLost = getExpectedCapacityLost(expectCapacityKnown, expectedFailedVolumes.length); assertEquals(expectedCapacityLost, volumeFailureSummary.getEstimatedCapacityLostTotal()); } else { assertNull(volumeFailureSummary); } }
/** * Test that a volume that is considered failed on startup is seen as * a failed volume by the NN. */ @Test public void testFailedVolumeOnStartupIsCounted() throws Exception { assumeTrue(!System.getProperty("os.name").startsWith("Windows")); final DatanodeManager dm = cluster.getNamesystem().getBlockManager( ).getDatanodeManager(); long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm); File dir = new File(cluster.getInstanceStorageDir(0, 0), "current"); try { prepareDirToFail(dir); restartDatanodes(1, false); // The cluster is up.. assertEquals(true, cluster.getDataNodes().get(0) .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId())); // but there has been a single volume failure DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1, origCapacity / 2, WAIT_FOR_HEARTBEATS); } finally { FileUtil.chmod(dir.toString(), "755"); } }
@Deprecated @Test(timeout=120000) public void testNodesPerInterval() throws Exception { Configuration newConf = new Configuration(conf); org.apache.log4j.Logger.getLogger(DecommissionManager.class) .setLevel(Level.TRACE); // Set the deprecated configuration key which limits the # of nodes per // interval newConf.setInt("dfs.namenode.decommission.nodes.per.interval", 1); // Disable the normal monitor runs newConf.setInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, Integer.MAX_VALUE); startCluster(1, 3, newConf); final FileSystem fs = cluster.getFileSystem(); final DatanodeManager datanodeManager = cluster.getNamesystem().getBlockManager().getDatanodeManager(); final DecommissionManager decomManager = datanodeManager.getDecomManager(); // Write a 3 block file, so each node has one block. Should scan 1 node // each time. DFSTestUtil.createFile(fs, new Path("/file1"), 64, (short) 3, 0xBAD1DEA); for (int i=0; i<3; i++) { doDecomCheck(datanodeManager, decomManager, 1); } }
/** Wait until the given namenode gets first block reports from all the datanodes */ public void waitFirstBRCompleted(int nnIndex, int timeout) throws IOException, TimeoutException, InterruptedException { if (namenodes.size() == 0 || getNN(nnIndex) == null || getNN(nnIndex).nameNode == null) { return; } final FSNamesystem ns = getNamesystem(nnIndex); final DatanodeManager dm = ns.getBlockManager().getDatanodeManager(); GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { List<DatanodeDescriptor> nodes = dm.getDatanodeListForReport (DatanodeReportType.LIVE); for (DatanodeDescriptor node : nodes) { if (!node.checkBlockReportReceived()) { return false; } } return true; } }, 100, timeout); }
@Override void invoke() throws Exception { DatanodeInfo[] newNodes = new DatanodeInfo[2]; newNodes[0] = nodes[0]; newNodes[1] = nodes[1]; final DatanodeManager dm = cluster.getNamesystem(0).getBlockManager() .getDatanodeManager(); final String storageID1 = dm.getDatanode(newNodes[0]).getStorageInfos()[0] .getStorageID(); final String storageID2 = dm.getDatanode(newNodes[1]).getStorageInfos()[0] .getStorageID(); String[] storageIDs = {storageID1, storageID2}; client.getNamenode().updatePipeline(client.getClientName(), oldBlock, newBlock, newNodes, storageIDs); // close can fail if the out.close() commit the block after block received // notifications from Datanode. // Since datanodes and output stream have still old genstamps, these // blocks will be marked as corrupt after HDFS-5723 if RECEIVED // notifications reaches namenode first and close() will fail. DFSTestUtil.abortStream((DFSOutputStream) out.getWrappedStream()); }
/** * Verify decommissioned nodes should not be selected. */ @Test public void testPlacementWithLocalRackNodesDecommissioned() throws Exception { String clientMachine = "client.foo.com"; // Map client to RACK3 String clientRack = "/RACK3"; StaticMapping.addNodeToRack(clientMachine, clientRack); final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager(); DatanodeDescriptor dnd3 = dnm.getDatanode( cluster.getDataNodes().get(3).getDatanodeId()); assertEquals(dnd3.getNetworkLocation(), clientRack); dnm.getDecomManager().startDecommission(dnd3); try { testPlacement(clientMachine, clientRack, false); } finally { dnm.getDecomManager().stopDecommission(dnd3); } }
static String getRollingUpgradeText(FSNamesystem fsn) { if (fsn == null) { return ""; } DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); Map<String, Integer> list = dm.getDatanodesSoftwareVersions(); if(list.size() > 1) { StringBuffer status = new StringBuffer("Rolling upgrades in progress. " + "There are " + list.size() + " versions of datanodes currently live: "); for(Map.Entry<String, Integer> ver: list.entrySet()) { status.append(ver.getKey() + "(" + ver.getValue() + "), "); } return status.substring(0, status.length()-2); } return ""; }
/** * Verify the support for decommissioning a datanode that is already dead. * Under this scenario the datanode should immediately be marked as * DECOMMISSIONED */ @Test(timeout=120000) public void testDecommissionDeadDN() throws IOException, InterruptedException, TimeoutException { DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId(); String dnName = dnID.getXferAddr(); DataNodeProperties stoppedDN = cluster.stopDataNode(0); DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(), false, 30000); FSNamesystem fsn = cluster.getNamesystem(); final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID); decommissionNode(fsn, localFileSys, dnName); dm.refreshNodes(conf); BlockManagerTestUtil.checkDecommissionState(dm, dnDescriptor); assertTrue(dnDescriptor.isDecommissioned()); // Add the node back cluster.restartDataNode(stoppedDN, true); cluster.waitActive(); // Call refreshNodes on FSNamesystem with empty exclude file to remove the // datanode from decommissioning list and make it available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); }
/** * Test that a volume that is considered failed on startup is seen as * a failed volume by the NN. */ @Test public void testFailedVolumeOnStartupIsCounted() throws Exception { assumeTrue(!System.getProperty("os.name").startsWith("Windows")); final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager(); long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm); File dir = new File(cluster.getInstanceStorageDir(0, 0), "current"); try { prepareDirToFail(dir); restartDatanodes(1, false); // The cluster is up.. assertEquals(true, cluster.getDataNodes().get(0) .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId())); // but there has been a single volume failure DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1, origCapacity / 2, WAIT_FOR_HEARTBEATS); } finally { FileUtil.chmod(dir.toString(), "755"); } }