@Test public void testEditLog() throws Exception { String src = "/testEditLog"; String src1 = "/testEditLog1"; NameNode nn = cluster.getNameNode(); String clientName = ((DistributedFileSystem) fs).getClient().clientName; fs.create(new Path(src)); for (int i = 0; i < 10; i++) { Block b = nn.addBlock(src, clientName).getBlock(); nn.abandonBlock(b, src, clientName); } fs.create(new Path(src1)); nn.addBlock(src1, clientName); cluster.restartNameNode(0, new String[] {}, false); nn = cluster.getNameNode(); assertTrue(nn.isInSafeMode()); nn.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); }
/** * Test for save namespace should succeed when parent directory renamed with * open lease and destination directory exist. * This test is a regression for HDFS-2827 */ @Test public void testSaveNamespaceWithRenamedLease() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 1, true, (String[]) null); cluster.waitActive(); DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); OutputStream out = null; try { fs.mkdirs(new Path("/test-target")); out = fs.create(new Path("/test-source/foo")); // don't close fs.rename(new Path("/test-source/"), new Path("/test-target/")); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.getNameNode().saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { IOUtils.cleanup(LOG, out, fs); if (cluster != null) { cluster.shutdown(); } } }
@Test public void testAvatarShellLeaveSafeMode() throws Exception { setUp(false, "testAvatarShellLeaveSafeMode"); int blocksBefore = blocksInFile(); AvatarShell shell = new AvatarShell(conf); AvatarNode primaryAvatar = cluster.getPrimaryAvatar(0).avatar; primaryAvatar.setSafeMode(SafeModeAction.SAFEMODE_ENTER); assertTrue(primaryAvatar.isInSafeMode()); assertEquals(0, shell.run(new String[] { "-zero", "-safemode", "leave" })); assertFalse(primaryAvatar.isInSafeMode()); assertFalse(cluster.getPrimaryAvatar(0).avatar.isInSafeMode()); int blocksAfter = blocksInFile(); assertTrue(blocksBefore == blocksAfter); }
@Test public void testAvatarShellLeaveSafeMode1() throws Exception { setUp(false, "testAvatarShellLeaveSafeMode1"); int blocksBefore = blocksInFile(); cluster.failOver(); cluster.restartStandby(); AvatarShell shell = new AvatarShell(conf); AvatarNode primaryAvatar = cluster.getPrimaryAvatar(0).avatar; primaryAvatar.setSafeMode(SafeModeAction.SAFEMODE_ENTER); assertTrue(primaryAvatar.isInSafeMode()); assertEquals(0, shell.run(new String[] { "-one", "-safemode", "leave" })); assertFalse(primaryAvatar.isInSafeMode()); assertFalse(cluster.getPrimaryAvatar(0).avatar.isInSafeMode()); int blocksAfter = blocksInFile(); assertTrue(blocksBefore == blocksAfter); }
@Test public void testAvatarShellLeaveSafeMode() throws Exception { setUp(false); int blocksBefore = blocksInFile(); AvatarShell shell = new AvatarShell(conf); AvatarNode primaryAvatar = cluster.getPrimaryAvatar(0).avatar; primaryAvatar.setSafeMode(SafeModeAction.SAFEMODE_ENTER); assertTrue(primaryAvatar.isInSafeMode()); assertEquals(0, shell.run(new String[] { "-zero", "-leaveSafeMode" })); assertFalse(primaryAvatar.isInSafeMode()); assertFalse(cluster.getPrimaryAvatar(0).avatar.isInSafeMode()); int blocksAfter = blocksInFile(); assertTrue(blocksBefore == blocksAfter); }
@Test public void testAvatarShellLeaveSafeMode1() throws Exception { setUp(false); int blocksBefore = blocksInFile(); cluster.failOver(); cluster.restartStandby(); AvatarShell shell = new AvatarShell(conf); AvatarNode primaryAvatar = cluster.getPrimaryAvatar(0).avatar; primaryAvatar.setSafeMode(SafeModeAction.SAFEMODE_ENTER); assertTrue(primaryAvatar.isInSafeMode()); assertEquals(0, shell.run(new String[] { "-one", "-leaveSafeMode" })); assertFalse(primaryAvatar.isInSafeMode()); assertFalse(cluster.getPrimaryAvatar(0).avatar.isInSafeMode()); int blocksAfter = blocksInFile(); assertTrue(blocksBefore == blocksAfter); }
public void testMismatchedBlockGS() throws IOException { Configuration conf = new Configuration(); final short REPLICATION_FACTOR = 1; MiniDFSCluster cluster = new MiniDFSCluster( conf, REPLICATION_FACTOR, true, null); try { cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); Path file1 = new Path("/tmp/file1"); // create a file DFSTestUtil.createFile(fs, file1, 10, REPLICATION_FACTOR, 0); // corrupt its generation stamp Block block = DFSTestUtil.getFirstBlock(fs, file1); corruptReplicaGS(block, 0, cluster); // stop and start the cluster cluster.shutdown(); cluster = new MiniDFSCluster( conf, REPLICATION_FACTOR, false, null, false); cluster.waitActive(); assertTrue(cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_GET)); cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_LEAVE); // Verify that there is a missing block assertEquals(1, cluster.getNameNode().getNamesystem().getMissingBlocksCount()); } finally { cluster.shutdown(); } }
public void testRestartNameNode(boolean waitSafeMode) throws Exception { String file = "/testRestartNameNode" + waitSafeMode; // Create a file and write data. FSDataOutputStream out = fs.create(new Path(file)); String clientName = ((DistributedFileSystem) fs).getClient().getClientName(); byte[] buffer = new byte[FILE_LEN]; random.nextBytes(buffer); out.write(buffer); ((DFSOutputStream) out.getWrappedStream()).sync(); // Now shutdown the namenode and try to close the file. cluster.shutdownNameNode(0); Thread closeThread = new CloseThread(out, file, clientName); closeThread.start(); Thread.sleep(CLOSE_FILE_TIMEOUT / 4); // Restart the namenode and verify the close file worked. if (!waitSafeMode) { cluster.restartNameNode(0, new String[]{}, false); cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } else { cluster.restartNameNode(0); } closeThread.join(5000); assertTrue(pass); }
@BeforeClass public static void setUpBeforeClass() throws Exception { conf = new Configuration(); conf.setInt("dfs.block.size", 1024); conf.setFloat("dfs.safemode.threshold.pct", 1.5f); cluster = new MiniDFSCluster(conf, 1, true, null, false); cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_LEAVE); fs = cluster.getFileSystem(); }
@Test public void testTxIdPersistence() throws Exception { Configuration conf = getConf(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 0, true, null); cluster.waitActive(); FSNamesystem fsn = cluster.getNameNode().getNamesystem(); try { // We have a BEGIN_LOG_SEGMENT txn to start assertEquals(0, fsn.getEditLog().getLastWrittenTxId()); doAnEdit(fsn, 1); assertEquals(1, fsn.getEditLog().getLastWrittenTxId()); fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fsn.saveNamespace(false, false); // 2 more txns: END the first segment, BEGIN a new one assertEquals(3, fsn.getEditLog().getLastWrittenTxId()); // Shut down and restart fsn.getFSImage().close(); fsn.close(); // 1 more txn to END that segment assertEquals(4, fsn.getEditLog().getLastWrittenTxId()); fsn = null; cluster = new MiniDFSCluster(conf, 0, false, null); cluster.waitActive(); fsn = cluster.getNameNode().getNamesystem(); // 1 more txn to start new segment on restart assertEquals(5, fsn.getEditLog().getLastWrittenTxId()); } finally { if (fsn != null) { fsn.close(); } } }
private void checkNameSpace(Configuration conf) throws IOException { NameNode namenode = new NameNode(conf); assertTrue(namenode.getFileInfo("/test").isDir()); namenode.setSafeMode(SafeModeAction.SAFEMODE_ENTER); namenode.saveNamespace(false, false); namenode.stop(); namenode.join(); }
@Override public boolean setSafeMode(final SafeModeAction action) throws IOException { return (failoverHandler.new MutableFSCaller<Boolean>() { public Boolean call(int r) throws IOException { return namenode.setSafeMode(action); } }).callFS(); }
/** * Verify that the NameNode stays in safemode when dfs.safemode.datanode.min * is set to a number greater than the number of live datanodes. */ @Test public void testDatanodeThreshold() throws IOException { MiniDFSCluster cluster = null; DistributedFileSystem fs = null; try { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1); // bring up a cluster with no datanodes cluster = new MiniDFSCluster(conf, 0, true, null); cluster.waitActive(); fs = (DistributedFileSystem)cluster.getFileSystem(); assertTrue("No datanode started, but we require one - safemode expected", fs.setSafeMode(SafeModeAction.SAFEMODE_GET)); String tipMsg = cluster.getNameNode().getNamesystem().getSafeModeTip(); assertTrue("Safemode tip message looks right", tipMsg.contains("The number of live datanodes 0 needs an " + "additional 1 live")); // Start a datanode cluster.startDataNodes(conf, 1, true, null, null); // Wait long enough for safemode check to refire try { Thread.sleep(1000); } catch (InterruptedException ignored) {} // We now should be out of safe mode. assertFalse( "Out of safe mode after starting datanode.", fs.setSafeMode(SafeModeAction.SAFEMODE_GET)); } finally { if (fs != null) fs.close(); if (cluster != null) cluster.shutdown(); } }
@Test public void testDNShouldNotSendBBWReportIfAppendOff() throws Exception { FileSystem fileSystem = null; FSDataOutputStream outStream = null; // disable the append support conf.setBoolean("dfs.support.append", false); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); cluster.waitActive(); try { fileSystem = cluster.getFileSystem(); // Keep open stream outStream = writeFileAndSync(fileSystem, src, fileContent); cluster.restartNameNode(false); Thread.sleep(2000); assertEquals( "Able to read the synced block content after NameNode restart (without append support", 0, getFileContentFromDFS(fileSystem).length()); } finally { // NN will not come out of safe mode. So exited the safemode forcibly to // clean the resources. cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_LEAVE); if (null != fileSystem) fileSystem.close(); if (null != outStream) outStream.close(); cluster.shutdown(); } }
public void testSaveWhileEditsRolled() throws Exception { Configuration conf = getConf(); NameNode.format(conf); NameNode nn = new NameNode(conf); FSNamesystem fsn = nn.getNamesystem(); // Replace the FSImage with a spy final FSImage originalImage = fsn.dir.fsImage; FSImage spyImage = spy(originalImage); fsn.dir.fsImage = spyImage; try { doAnEdit(fsn, 1); CheckpointSignature sig = fsn.rollEditLog(); LOG.warn("Checkpoint signature: " + sig); // Do another edit doAnEdit(fsn, 2); // Save namespace fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fsn.saveNamespace(); // Now shut down and restart the NN nn.stop(); nn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. nn = new NameNode(conf); fsn = nn.getNamesystem(); // Make sure the image loaded including our edits. checkEditExists(fsn, 1); checkEditExists(fsn, 2); } finally { if (nn != null) { nn.stop(); } } }
/** * Is the HDFS healthy? * HDFS is considered as healthy if it is up and not in safemode. * * @param uri the HDFS URI. Note that the URI path is ignored. * @return true if HDFS is healthy; false, otherwise. */ public static boolean isHealthy(URI uri) { //check scheme final String scheme = uri.getScheme(); if (!"hdfs".equalsIgnoreCase(scheme)) { throw new IllegalArgumentException("This scheme is not hdfs, uri=" + uri); } final Configuration conf = new Configuration(); //disable FileSystem cache conf.setBoolean(String.format("fs.%s.impl.disable.cache", scheme), true); //disable client retry for rpc connection and rpc calls conf.setBoolean(DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY, false); conf.setInt(Client.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 0); DistributedFileSystem fs = null; try { fs = (DistributedFileSystem)FileSystem.get(uri, conf); final boolean safemode = fs.setSafeMode(SafeModeAction.SAFEMODE_GET); if (LOG.isDebugEnabled()) { LOG.debug("Is namenode in safemode? " + safemode + "; uri=" + uri); } fs.close(); fs = null; return !safemode; } catch(IOException e) { if (LOG.isDebugEnabled()) { LOG.debug("Got an exception for uri=" + uri, e); } return false; } finally { IOUtils.cleanup(LOG, fs); } }
/** * This test verifies that if SafeMode is manually entered, name-node does not * come out of safe mode even after the startup safe mode conditions are met. * <ol> * <li>Start cluster with 1 data-node.</li> * <li>Create 2 files with replication 1.</li> * <li>Re-start cluster with 0 data-nodes. * Name-node should stay in automatic safe-mode.</li> * <li>Enter safe mode manually.</li> * <li>Start the data-node.</li> * <li>Wait longer than <tt>dfs.safemode.extension</tt> and * verify that the name-node is still in safe mode.</li> * </ol> * * @throws IOException */ @Test public void testManualSafeMode() throws IOException { fs = (DistributedFileSystem)cluster.getFileSystem(); Path file1 = new Path("/tmp/testManualSafeMode/file1"); Path file2 = new Path("/tmp/testManualSafeMode/file2"); // create two files with one block each. DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0); DFSTestUtil.createFile(fs, file2, 2000, (short)1, 0); fs.close(); cluster.shutdown(); // now bring up just the NameNode. cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).build(); cluster.waitActive(); dfs = (DistributedFileSystem)cluster.getFileSystem(); assertTrue("No datanode is started. Should be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_GET)); // manually set safemode. dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); // now bring up the datanode and wait for it to be active. cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); // wait longer than dfs.namenode.safemode.extension try { Thread.sleep(2000); } catch (InterruptedException ignored) {} assertTrue("should still be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_GET)); assertFalse("should not be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE)); }
private void checkNameSpace(Configuration conf) throws IOException { NameNode namenode = new NameNode(conf); assertTrue(namenode.getFileInfo("/test").isDir()); namenode.setSafeMode(SafeModeAction.SAFEMODE_ENTER); namenode.saveNamespace(); namenode.stop(); namenode.join(); }
@Override public boolean setSafeMode(final SafeModeAction action) throws IOException { return (new MutableFSCaller<Boolean>() { Boolean call(int r) throws IOException { return namenode.setSafeMode(action); } }).callFS(); }
void replicationTest(int badDN) throws Exception { LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { int halfBlock = (int)BLOCK_SIZE/2; short rep = 3; // replication assertTrue(BLOCK_SIZE%4 == 0); file1 = new Path("/appendWithReplication.dat"); // write 1/2 block & sync stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE); AppendTestUtil.write(stm, 0, halfBlock); stm.sync(); assertNumCurrentReplicas(rep); // close one of the datanodes cluster.stopDataNode(badDN); // write 1/4 block & sync AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4); stm.sync(); assertNumCurrentReplicas((short)(rep - 1)); // restart the cluster /* * we put the namenode in safe mode first so he doesn't process * recoverBlock() commands from the remaining DFSClient as datanodes * are serially shutdown */ cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs1.close(); cluster.shutdown(); LOG.info("STOPPED first instance of the cluster"); cluster = new MiniDFSCluster(conf, 3, false, null); cluster.getNameNode().getNamesystem().stallReplicationWork(); cluster.waitActive(); fs1 = cluster.getFileSystem(); LOG.info("START second instance."); recoverFile(fs1); LOG.info("Recovered file"); // the 2 DNs with the larger sequence number should win BlockLocation[] bl = fs1.getFileBlockLocations( fs1.getFileStatus(file1), 0, BLOCK_SIZE); LOG.info("Checking blocks"); assertTrue("Should have one block", bl.length == 1); // Wait up to 1 second for block replication - we may have // only replication 1 for a brief moment after close, since // closing only waits for fs.replcation.min replicas, and // it may take some millis before the other DN reports block waitForBlockReplication(fs1, file1.toString(), 2, 1); assertFileSize(fs1, BLOCK_SIZE*3/4); checkFile(fs1, BLOCK_SIZE*3/4); LOG.info("Checking replication"); // verify that, over time, the block has been replicated to 3 DN cluster.getNameNode().getNamesystem().restartReplicationWork(); waitForBlockReplication(fs1, file1.toString(), 3, 20); } finally { fs1.close(); cluster.shutdown(); } }
void checksumTest(int goodDN) throws Exception { int deadDN = (goodDN + 1) % 3; int corruptDN = (goodDN + 2) % 3; LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { int halfBlock = (int)BLOCK_SIZE/2; short rep = 3; // replication assertTrue(BLOCK_SIZE%8 == 0); file1 = new Path("/appendBadChecksum.dat"); // write 1/2 block & sync stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE); AppendTestUtil.write(stm, 0, halfBlock); stm.sync(); assertNumCurrentReplicas(rep); // close one of the datanodes cluster.stopDataNode(deadDN); // write 1/4 block & sync AppendTestUtil.write(stm, halfBlock, (int)BLOCK_SIZE/4); stm.sync(); assertNumCurrentReplicas((short)(rep - 1)); // stop the cluster cluster.getNameNode().setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs1.close(); cluster.shutdown(); LOG.info("STOPPED first instance of the cluster"); // give the second datanode a bad CRC corruptDataNode(corruptDN, CorruptionType.CORRUPT_LAST_CHUNK); // restart the cluster cluster = new MiniDFSCluster(conf, 3, false, null); cluster.getNameNode().getNamesystem().stallReplicationWork(); cluster.waitActive(); fs1 = cluster.getFileSystem(); LOG.info("START second instance."); // verify that only the good datanode's file is used recoverFile(fs1); BlockLocation[] bl = fs1.getFileBlockLocations( fs1.getFileStatus(file1), 0, BLOCK_SIZE); assertTrue("Should have one block", bl.length == 1); assertTrue("Should have 1 replica for that block, not " + bl[0].getNames().length, bl[0].getNames().length == 1); assertTrue("The replica should be the datanode with the correct CRC", cluster.getDataNodes().get(goodDN).getSelfAddr().toString() .endsWith(bl[0].getNames()[0]) ); assertFileSize(fs1, BLOCK_SIZE*3/4); // should fail checkFile() if data with the bad CRC was used checkFile(fs1, BLOCK_SIZE*3/4); // ensure proper re-replication cluster.getNameNode().getNamesystem().restartReplicationWork(); waitForBlockReplication(fs1, file1.toString(), 3, 20); } finally { fs1.close(); cluster.shutdown(); } }
public void testManualSafeMode() throws IOException { MiniDFSCluster cluster = null; FileSystem fs = null; try { Configuration conf = new Configuration(); // disable safemode extension to make the test run faster. conf.set("dfs.safemode.extension", "1"); cluster = new MiniDFSCluster(conf, 1, true, null); cluster.waitActive(); fs = cluster.getFileSystem(); Path file1 = new Path("/tmp/testManualSafeMode/file1"); Path file2 = new Path("/tmp/testManualSafeMode/file2"); LOG.info("Created file1 and file2."); // create two files with one block each. DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0); DFSTestUtil.createFile(fs, file2, 2000, (short)1, 0); cluster.shutdown(); // now bring up just the NameNode. cluster = new MiniDFSCluster(conf, 0, false, null); cluster.waitActive(); LOG.info("Restarted cluster with just the NameNode"); NameNode namenode = cluster.getNameNode(); assertTrue("No datanode is started. Should be in SafeMode", namenode.isInSafeMode()); // manually set safemode. namenode.setSafeMode(SafeModeAction.SAFEMODE_ENTER); // now bring up the datanode and wait for it to be active. cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); LOG.info("Datanode is started."); try { Thread.sleep(2000); } catch (InterruptedException ignored) {} assertTrue("should still be in SafeMode", namenode.isInSafeMode()); namenode.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); assertFalse("should not be in SafeMode", namenode.isInSafeMode()); } finally { if(fs != null) fs.close(); if(cluster!= null) cluster.shutdown(); } }
/** * Verify that a saveNamespace command brings faulty directories * in fs.name.dir and fs.edit.dir back online. */ @Test public void testReinsertnamedirsInSavenamespace() throws Exception { // create a configuration with the key to restore error // directories in fs.name.dir Configuration conf = getConf(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); cluster.waitActive(); FSNamesystem fsn = cluster.getNameNode().getNamesystem(); // Replace the FSImage with a spy FSImage originalImage = fsn.dir.fsImage; FSImage spyImage = spy(originalImage); long mrctxid = originalImage.storage.getMostRecentCheckpointTxId(); spyImage.setImageDigest(mrctxid ,originalImage.getImageDigest(mrctxid)); fsn.dir.fsImage = spyImage; File rootDir = spyImage.storage.getStorageDir(0).getRoot(); rootDir.setExecutable(false); rootDir.setWritable(false); rootDir.setReadable(false); try { doAnEdit(fsn, 1); fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); // Save namespace - this injects a fault and marks one // directory as faulty. LOG.info("Doing the first savenamespace."); fsn.saveNamespace(false, false); LOG.warn("First savenamespace sucessful."); assertTrue("Savenamespace should have marked one directory as bad." + " But found " + spyImage.getRemovedStorageDirs().size() + " bad directories.", spyImage.getRemovedStorageDirs().size() == 1); rootDir.setExecutable(true); rootDir.setWritable(true); rootDir.setReadable(true); // The next call to savenamespace should try inserting the // erroneous directory back to fs.name.dir. This command should // be successful. LOG.info("Doing the second savenamespace."); fsn.saveNamespace(false, false); LOG.warn("Second savenamespace sucessful."); assertTrue("Savenamespace should have been successful in removing " + " bad directories from Image." + " But found " + originalImage.getRemovedStorageDirs().size() + " bad directories.", originalImage.getRemovedStorageDirs().size() == 0); // Now shut down and restart the namesystem LOG.info("Shutting down fsimage."); originalImage.close(); fsn.close(); fsn = null; cluster.shutdown(); // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. LOG.info("Loading new FSmage from disk."); cluster = new MiniDFSCluster(conf, 1, false, null); cluster.waitActive(); fsn = cluster.getNameNode().getNamesystem(); // Make sure the image loaded including our edit. LOG.info("Checking reloaded image."); checkEditExists(cluster.getNameNode().namesystem, 1); LOG.info("Reloaded image is good."); } finally { fsn.close(); cluster.shutdown(); } }
/** * Injects a failure on all storage directories while saving namespace. * * @param restoreStorageAfterFailure if true, will try to save again after * clearing the failure injection */ private void doTestFailedSaveNamespace(boolean restoreStorageAfterFailure) throws Exception { Configuration conf = getConf(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 0, true, null); cluster.waitActive(); FSNamesystem fsn = cluster.getNameNode().getNamesystem(); // Replace the FSImage with a spy final FSImage originalImage = fsn.dir.fsImage; NNStorage storage = originalImage.storage; storage.close(); // unlock any directories that FSNamesystem's initialization may have locked NNStorage spyStorage = spy(storage); originalImage.storage = spyStorage; FSImage spyImage = spy(originalImage); fsn.dir.fsImage = spyImage; spyImage.storage.setStorageDirectories( NNStorageConfiguration.getNamespaceDirs(conf), NNStorageConfiguration.getNamespaceEditsDirs(conf)); doThrow(new IOException("Injected fault: saveFSImage")). when(spyImage).saveFSImage( (SaveNamespaceContext)anyObject(), (ImageManager)anyObject(), anyBoolean()); try { doAnEdit(fsn, 1); // Save namespace fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); try { fsn.saveNamespace(false, false); fail("saveNamespace did not fail even when all directories failed!"); } catch (IOException ioe) { LOG.info("Got expected exception", ioe); } // Ensure that, if storage dirs come back online, things work again. if (restoreStorageAfterFailure) { Mockito.reset(spyImage); fsn.saveNamespace(false, false); checkEditExists(fsn, 1); } // Now shut down and restart the NN originalImage.close(); fsn.close(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. cluster = new MiniDFSCluster(conf, 0, false, null); cluster.waitActive(); fsn = cluster.getNameNode().getNamesystem(); // Make sure the image loaded including our edits. checkEditExists(fsn, 1); } finally { if (fsn != null) { fsn.close(); } } }
@Test public void testSaveWhileEditsRolled() throws Exception { Configuration conf = getConf(); NameNode.myMetrics = new NameNodeMetrics(conf, null); NameNode.format(conf); NameNode nn = new NameNode(conf); FSNamesystem fsn = nn.getNamesystem(); // Replace the FSImage with a spy final FSImage originalImage = fsn.dir.fsImage; FSImage spyImage = spy(originalImage); fsn.dir.fsImage = spyImage; try { doAnEdit(fsn, 1); CheckpointSignature sig = fsn.rollEditLog(); LOG.warn("Checkpoint signature: " + sig); // Do another edit doAnEdit(fsn, 2); // Save namespace fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fsn.saveNamespace(false, false); // Now shut down and restart the NN nn.stop(); nn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. nn = new NameNode(conf); fsn = nn.getNamesystem(); // Make sure the image loaded including our edits. checkEditExists(fsn, 1); checkEditExists(fsn, 2); } finally { if (nn != null) { nn.stop(); } } }
@Test public void testSafeModeDuplicateBlocks() throws Exception { conf = new Configuration(); conf.setInt("dfs.block.size", BLOCK_SIZE); conf.setLong("dfs.heartbeat.interval", 1); cluster = new MiniDFSCluster(conf, 1, true, null); fs = cluster.getFileSystem(); // Create data. String test = "/testSafeModeDuplicateBlocks"; DFSTestUtil util = new DFSTestUtil(test, 10, 1, MAX_FILE_SIZE); util.createFiles(fs, test); fs.close(); cluster.shutdown(); // Restart the cluster with NN in manual safemode. conf.setLong("dfs.blockreport.intervalMsec", 200); cluster = new MiniDFSCluster(conf, 0, false, null); NameNode nn = cluster.getNameNode(); FSNamesystem ns = nn.namesystem; nn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); long start = System.currentTimeMillis(); // Wait for atleast 3 full block reports from the datanode. while (System.currentTimeMillis() - start <= MAX_WAIT_TIME) { // This makes sure we trigger, redudant addStoredBlocks() on the NameNode. for (DatanodeInfo dn : ns.datanodeReport(DatanodeReportType.ALL)) { ns.unprotectedRemoveDatanode(ns.getDatanode(dn)); } Thread.sleep(200); } // Verify atleast 3 full block reports took place. assertTrue(3 <= NameNode.getNameNodeMetrics().numBlockReport .getCurrentIntervalValue()); // Verify the total number of safe blocks. long totalBlocks = ns.getBlocksTotal(); long safeBlocks = ns.getSafeBlocks(); assertEquals(totalBlocks, safeBlocks); }
@Test /** * Verifies that removing a decommissioning datanode decrements the safe block * count. */ public void testDecommissionDecrementSafeBlock() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); try { // Create a single block on a datanode. FSNamesystem ns = cluster.getNameNode().namesystem; ArrayList<DatanodeDescriptor> dns = ns .getDatanodeListForReport(DatanodeReportType.ALL); assertNotNull(dns); assertEquals(1, dns.size()); DFSTestUtil.createFile(cluster.getFileSystem(), new Path("/abc"), 1024L, (short) 3, 0); // Remove the datanode, enter safemode and restart it. ns.removeDatanode(dns.get(0)); ns.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.restartDataNodes(); // Wait for a block report. long start = System.currentTimeMillis(); while (System.currentTimeMillis() - start < 10000 && ns.getSafeBlocks() != 1) { Thread.sleep(1000); } // Verify we received the block report. assertEquals(1, ns.getSafeBlocks()); // Remove a decommissioning node and verify the safe block count is // decremented. ns.startDecommission(dns.get(0)); assertEquals(1, ns.getSafeBlocks()); ns.removeDatanode(dns.get(0)); assertEquals(0, ns.getSafeBlocks()); } finally { cluster.shutdown(); } }
private static void doSaveNamespace(NameNode nn) throws IOException { LOG.info("Saving namespace..."); nn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); nn.saveNamespace(); nn.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); }