static void corruptBlock(Block block, MiniDFSCluster dfs) throws IOException { boolean corrupted = false; for (int i = 0; i < NUM_DATANODES; i++) { corrupted |= TestDatanodeBlockScanner.corruptReplica(block, i, dfs); } assertTrue("could not corrupt block", corrupted); }
static void corruptBlock(Block block, MiniDFSCluster dfs) throws IOException { boolean corrupted = false; for (int i = 0; i < NUM_DATANODES; i++) { corrupted |= TestDatanodeBlockScanner.corruptReplica(block, i, dfs); } assertTrue("could not corrupt block: " + block, corrupted); }
static void corruptBlock(String blockName, MiniDFSCluster dfs) throws IOException { boolean corrupted = false; for (int i = 0; i < NUM_DATANODES; i++) { corrupted |= TestDatanodeBlockScanner.corruptReplica(blockName, i, dfs); } assertTrue("could not corrupt block", corrupted); }
static void corruptBlock(String blockName, MiniDFSCluster dfs) throws IOException { boolean corrupted = false; for (int i = 0; i < NUM_DATANODES; i++) { corrupted |= TestDatanodeBlockScanner.corruptReplica(blockName, i, dfs); } assertTrue("could not corrupt block: " + blockName, corrupted); }
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { int namespaceId = cluster.getNameNode().getNamespaceID(); final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block, 0, cluster); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning at startup // remove curr and prev File scanLogCurr = new File(cluster.getBlockDirectory("data1") .getParent(), "dncp_block_verification.log.curr"); scanLogCurr.delete(); File scanLogPrev = new File(cluster.getBlockDirectory("data1") .getParent(), "dncp_block_verification.log.prev"); scanLogPrev.delete(); // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).getDNRegistrationForNS(namespaceId); final FSNamesystem namesystem = cluster.getNameNode().getNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0); } } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); waitReplication(namesystem, block, (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); // Test the case when multiple calls to setReplication still succeeds. System.out.println("Starting next test with file foo2."); final Path fileName2 = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName2, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName2, (short)3); LocatedBlocks lbs = namesystem.getBlockLocations( fileName2.toString(), 0, 10); Block firstBlock = lbs.get(0).getBlock(); namesystem.setReplication(fileName2.toString(), (short)2); namesystem.setReplication(fileName2.toString(), (short)1); // wait upto one minute for excess replicas to get deleted. It is not // immediate because excess replicas are being handled asyncronously. waitReplication(namesystem, firstBlock, (short)1); assertEquals(1, namesystem.countNodes(firstBlock).liveReplicas()); } finally { cluster.shutdown(); } }
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ @Test public void testProcesOverReplicateBlock() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set( DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); assertTrue(TestDatanodeBlockScanner.corruptReplica(block, 0)); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(MiniDFSCluster.getFinalizedDir( cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent().toString() + "/../dncp_block_verification.log.prev"); //wait for one minute for deletion to succeed; for(int i=0; !scanLog.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); String blockPoolId = cluster.getNamesystem().getBlockPoolId(); final DatanodeID corruptDataNode = DataNodeTestUtils.getDNRegistrationForBP( cluster.getDataNodes().get(2), blockPoolId); final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); try { namesystem.writeLock(); synchronized(hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs String corruptMachineName = corruptDataNode.getXferAddr(); for (DatanodeDescriptor datanode : hm.getDatanodes()) { if (!corruptMachineName.equals(datanode.getXferAddr())) { datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0, 0); } } // decrease the replication factor to 1; NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas()); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ @Test public void testProcesOverReplicateBlock() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set( DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); assertTrue(TestDatanodeBlockScanner.corruptReplica(block, 0)); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(MiniDFSCluster.getFinalizedDir( cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent().toString() + "/../dncp_block_verification.log.prev"); //wait for one minute for deletion to succeed; for(int i=0; !scanLog.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); String blockPoolId = cluster.getNamesystem().getBlockPoolId(); final DatanodeID corruptDataNode = DataNodeTestUtils.getDNRegistrationForBP( cluster.getDataNodes().get(2), blockPoolId); final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); try { namesystem.writeLock(); synchronized(hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs String corruptMachineName = corruptDataNode.getXferAddr(); for (DatanodeDescriptor datanode : hm.getDatanodes()) { if (!corruptMachineName.equals(datanode.getXferAddr())) { datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L); datanode.updateHeartbeat( BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0); } } // decrease the replication factor to 1; NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas()); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(System.getProperty("test.build.data"), "dfs/data/data1/current/dncp_block_verification.log.curr"); //wait for one minute for deletion to succeed; for(int i=0; !scanLog.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).dnRegistration; final FSNamesystem namesystem = FSNamesystem.getFSNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 0); } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); } } finally { cluster.shutdown(); } }
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { int namespaceId = cluster.getNameNode().getNamespaceID(); final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0, cluster); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(cluster.getBlockDirectory("data1").getParent(), "dncp_block_verification.log.curr"); //wait for one minute for deletion to succeed; scanLog.delete(); // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).getDNRegistrationForNS(namespaceId); final FSNamesystem namesystem = cluster.getNameNode().getNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0); } } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); waitReplication(namesystem, block, (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); // Test the case when multiple calls to setReplication still succeeds. System.out.println("Starting next test with file foo2."); final Path fileName2 = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName2, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName2, (short)3); LocatedBlocks lbs = namesystem.getBlockLocations( fileName2.toString(), 0, 10); Block firstBlock = lbs.get(0).getBlock(); namesystem.setReplication(fileName2.toString(), (short)2); namesystem.setReplication(fileName2.toString(), (short)1); // wait upto one minute for excess replicas to get deleted. It is not // immediate because excess replicas are being handled asyncronously. waitReplication(namesystem, firstBlock, (short)1); assertEquals(1, namesystem.countNodes(firstBlock).liveReplicas()); } finally { cluster.shutdown(); } }