boolean setSafeMode(SafeModeAction action) throws IOException { if (action != SafeModeAction.SAFEMODE_GET) { checkSuperuserPrivilege(); switch(action) { case SAFEMODE_LEAVE: // leave safe mode leaveSafeMode(); break; case SAFEMODE_ENTER: // enter safe mode enterSafeMode(false); break; default: LOG.error("Unexpected safe mode action"); } } return isInSafeMode(); }
/** * Update internal state to indicate that a rolling upgrade is in progress for * non-HA setup. This requires the namesystem is in SafeMode and after doing a * checkpoint for rollback the namesystem will quit the safemode automatically */ private void startRollingUpgradeInternalForNonHA(long startTime) throws IOException { Preconditions.checkState(!haEnabled); if (!isInSafeMode()) { throw new IOException("Safe mode should be turned ON " + "in order to create namespace image."); } checkRollingUpgrade("start rolling upgrade"); getFSImage().checkUpgrade(); // in non-HA setup, we do an extra checkpoint to generate a rollback image getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null); LOG.info("Successfully saved namespace for preparing rolling upgrade."); // leave SafeMode automatically setSafeMode(SafeModeAction.SAFEMODE_LEAVE); setRollingUpgradeInfo(true, startTime); }
/** * Ensure that during downgrade the NN fails to load a fsimage with newer * format. */ @Test(expected = IncorrectVersionException.class) public void testRejectNewFsImage() throws IOException { final Configuration conf = new Configuration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs.saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); NNStorage storage = spy(cluster.getNameNode().getFSImage().getStorage()); int futureVersion = NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1; doReturn(futureVersion).when(storage).getServiceLayoutVersion(); storage.writeAll(); cluster.restartNameNode(0, true, "-rollingUpgrade", "downgrade"); } finally { if (cluster != null) { cluster.shutdown(); } } }
private static void startRollingUpgrade(Path foo, Path bar, Path file, byte[] data, MiniDFSCluster cluster) throws IOException { final DistributedFileSystem dfs = cluster.getFileSystem(); //start rolling upgrade dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); dfs.rollingUpgrade(RollingUpgradeAction.PREPARE); dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); dfs.mkdirs(bar); Assert.assertTrue(dfs.exists(foo)); Assert.assertTrue(dfs.exists(bar)); //truncate a file final int newLength = DFSUtil.getRandom().nextInt(data.length - 1) + 1; dfs.truncate(file, newLength); TestFileTruncate.checkBlockRecovery(file, dfs); AppendTestUtil.checkFullFile(dfs, file, newLength, data); }
@Test (timeout = 300000) public void testQueryAfterRestart() throws IOException, InterruptedException { final Configuration conf = new Configuration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); cluster.waitActive(); DistributedFileSystem dfs = cluster.getFileSystem(); dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); // start rolling upgrade dfs.rollingUpgrade(RollingUpgradeAction.PREPARE); queryForPreparation(dfs); dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); dfs.saveNamespace(); dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); cluster.restartNameNodes(); dfs.rollingUpgrade(RollingUpgradeAction.QUERY); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Test for save namespace should succeed when parent directory renamed with * open lease and destination directory exist. * This test is a regression for HDFS-2827 */ @Test public void testSaveNamespaceWithRenamedLease() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()) .numDataNodes(1).build(); cluster.waitActive(); DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); OutputStream out = null; try { fs.mkdirs(new Path("/test-target")); out = fs.create(new Path("/test-source/foo")); // don't close fs.rename(new Path("/test-source/"), new Path("/test-target/")); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.getNameNodeRpc().saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { IOUtils.cleanup(LOG, out, fs); if (cluster != null) { cluster.shutdown(); } } }
@Test (timeout=30000) public void testSaveNamespaceWithDanglingLease() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()) .numDataNodes(1).build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); try { cluster.getNamesystem().leaseManager.addLease("me", "/non-existent"); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.getNameNodeRpc().saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { if (cluster != null) { cluster.shutdown(); } } }
@Test public void testCorrectNumberOfBlocksAfterRestart() throws IOException { final Path foo = new Path("/foo"); final Path bar = new Path(foo, "bar"); final Path file = new Path(foo, "file"); final String snapshotName = "ss0"; DFSTestUtil.createFile(hdfs, file, BLOCKSIZE, REPLICATION, seed); hdfs.mkdirs(bar); hdfs.setQuota(foo, Long.MAX_VALUE - 1, Long.MAX_VALUE - 1); hdfs.setQuota(bar, Long.MAX_VALUE - 1, Long.MAX_VALUE - 1); hdfs.allowSnapshot(foo); hdfs.createSnapshot(foo, snapshotName); hdfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); hdfs.saveNamespace(); hdfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); hdfs.deleteSnapshot(foo, snapshotName); hdfs.delete(bar, true); hdfs.delete(foo, true); long numberOfBlocks = cluster.getNamesystem().getBlocksTotal(); cluster.restartNameNode(0); assertEquals(numberOfBlocks, cluster.getNamesystem().getBlocksTotal()); }
/** * Ensure that the digest written by the saver equals to the digest of the * file. */ @Test public void testDigest() throws IOException { Configuration conf = new Configuration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); DistributedFileSystem fs = cluster.getFileSystem(); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs.saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); File currentDir = FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0).get( 0); File fsimage = FSImageTestUtil.findNewestImageFile(currentDir .getAbsolutePath()); assertEquals(MD5FileUtils.readStoredMd5ForFile(fsimage), MD5FileUtils.computeMd5ForFile(fsimage)); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Ensure that restart namenode with downgrade option should throw exception * because it has been obsolete. */ @Test(expected = IllegalArgumentException.class) public void testRejectNewFsImage() throws IOException { final Configuration conf = new Configuration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs.saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); NNStorage storage = spy(cluster.getNameNode().getFSImage().getStorage()); int futureVersion = NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION - 1; doReturn(futureVersion).when(storage).getServiceLayoutVersion(); storage.writeAll(); cluster.restartNameNode(0, true, "-rollingUpgrade", "downgrade"); } finally { if (cluster != null) { cluster.shutdown(); } } }
private static void startRollingUpgrade(Path foo, Path bar, Path file, byte[] data, MiniDFSCluster cluster) throws IOException { final DistributedFileSystem dfs = cluster.getFileSystem(); //start rolling upgrade dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); dfs.rollingUpgrade(RollingUpgradeAction.PREPARE); dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); dfs.mkdirs(bar); Assert.assertTrue(dfs.exists(foo)); Assert.assertTrue(dfs.exists(bar)); //truncate a file final int newLength = ThreadLocalRandom.current().nextInt(data.length - 1) + 1; dfs.truncate(file, newLength); TestFileTruncate.checkBlockRecovery(file, dfs); AppendTestUtil.checkFullFile(dfs, file, newLength, data); }
/** * Test for save namespace should succeed when parent directory renamed with * open lease and destination directory exist. * This test is a regression for HDFS-2827 */ @Test (timeout=30000) public void testSaveNamespaceWithRenamedLease() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()) .numDataNodes(1).build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); OutputStream out = null; try { fs.mkdirs(new Path("/test-target")); out = fs.create(new Path("/test-source/foo")); // don't close fs.rename(new Path("/test-source/"), new Path("/test-target/")); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.getNameNodeRpc().saveNamespace(0, 0); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { IOUtils.cleanup(LOG, out, fs); if (cluster != null) { cluster.shutdown(); } } }
@Test (timeout=30000) public void testSaveNamespaceWithDanglingLease() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()) .numDataNodes(1).build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); try { cluster.getNamesystem().leaseManager.addLease("me", INodeId.ROOT_INODE_ID + 1); fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); cluster.getNameNodeRpc().saveNamespace(0, 0); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Test verify erasure coding policy is present after restarting the NameNode. */ @Test(timeout = 120000) public void testSnapshotsOnErasureCodingDirAfterNNRestart() throws Exception { final Path ecDir = new Path("/ecdir"); fs.mkdirs(ecDir); fs.allowSnapshot(ecDir); // set erasure coding policy fs.setErasureCodingPolicy(ecDir, sysDefaultPolicy); final Path snap1 = fs.createSnapshot(ecDir, "snap1"); ErasureCodingPolicy ecSnap = fs.getErasureCodingPolicy(snap1); assertEquals("Got unexpected erasure coding policy", sysDefaultPolicy, ecSnap); // save namespace, restart namenode, and check ec policy correctness. fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs.saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); cluster.restartNameNode(true); ErasureCodingPolicy ecSnap1 = fs.getErasureCodingPolicy(snap1); assertEquals("Got unexpected erasure coding policy", sysDefaultPolicy, ecSnap1); assertEquals("Got unexpected ecSchema", ecSnap.getSchema(), ecSnap1.getSchema()); }
@Override // NameNode protected void initialize(Configuration conf) throws IOException { // Trash is disabled in BackupNameNode, // but should be turned back on if it ever becomes active. conf.setLong(CommonConfigurationKeys.FS_TRASH_INTERVAL_KEY, CommonConfigurationKeys.FS_TRASH_INTERVAL_DEFAULT); NamespaceInfo nsInfo = handshake(conf); super.initialize(conf); namesystem.setBlockPoolId(nsInfo.getBlockPoolID()); if (false == namesystem.isInSafeMode()) { namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); } // Backup node should never do lease recovery, // therefore lease hard limit should never expire. namesystem.leaseManager.setLeasePeriod( HdfsConstants.LEASE_SOFTLIMIT_PERIOD, Long.MAX_VALUE); // register with the active name-node registerWith(nsInfo); // Checkpoint daemon should start after the rpc server started runCheckpointDaemon(conf); InetSocketAddress addr = getHttpAddress(); if (addr != null) { conf.set(BN_HTTP_ADDRESS_NAME_KEY, NetUtils.getHostPortString(getHttpAddress())); } }
/** * Instantiates an FSNamesystem loaded from the image and edits * directories specified in the passed Configuration. * * @param conf the Configuration which specifies the storage directories * from which to load * @return an FSNamesystem which contains the loaded namespace * @throws IOException if loading fails */ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { checkConfiguration(conf); FSImage fsImage = new FSImage(conf, FSNamesystem.getNamespaceDirs(conf), FSNamesystem.getNamespaceEditsDirs(conf)); FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); StartupOption startOpt = NameNode.getStartupOption(conf); if (startOpt == StartupOption.RECOVER) { namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); } long loadStart = monotonicNow(); try { namesystem.loadFSImage(startOpt); } catch (IOException ioe) { LOG.warn("Encountered exception loading fsimage", ioe); fsImage.close(); throw ioe; } long timeTakenToLoadFSImage = monotonicNow() - loadStart; LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); if (nnMetrics != null) { nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); } return namesystem; }
@Override // ClientProtocol public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException { checkNNStartup(); OperationCategory opCategory = OperationCategory.UNCHECKED; if (isChecked) { if (action == SafeModeAction.SAFEMODE_GET) { opCategory = OperationCategory.READ; } else { opCategory = OperationCategory.WRITE; } } namesystem.checkOperation(opCategory); return namesystem.setSafeMode(action); }
public static SafeModeActionProto convert( SafeModeAction a) { switch (a) { case SAFEMODE_LEAVE: return SafeModeActionProto.SAFEMODE_LEAVE; case SAFEMODE_ENTER: return SafeModeActionProto.SAFEMODE_ENTER; case SAFEMODE_GET: return SafeModeActionProto.SAFEMODE_GET; default: throw new IllegalArgumentException("Unexpected SafeModeAction :" + a); } }
public static SafeModeAction convert( ClientNamenodeProtocolProtos.SafeModeActionProto a) { switch (a) { case SAFEMODE_LEAVE: return SafeModeAction.SAFEMODE_LEAVE; case SAFEMODE_ENTER: return SafeModeAction.SAFEMODE_ENTER; case SAFEMODE_GET: return SafeModeAction.SAFEMODE_GET; default: throw new IllegalArgumentException("Unexpected SafeModeAction :" + a); } }
@Override public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException { SetSafeModeRequestProto req = SetSafeModeRequestProto.newBuilder() .setAction(PBHelper.convert(action)).setChecked(isChecked).build(); try { return rpcProxy.setSafeMode(null, req).getResult(); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } }
private boolean waitExitSafeMode(DistributedFileSystem dfs, boolean inSafeMode) throws IOException { while (inSafeMode) { try { Thread.sleep(5000); } catch (java.lang.InterruptedException e) { throw new IOException("Wait Interrupted"); } inSafeMode = dfs.setSafeMode(SafeModeAction.SAFEMODE_GET, false); } return inSafeMode; }
private boolean waitExitSafeMode(ClientProtocol nn, boolean inSafeMode) throws IOException { while (inSafeMode) { try { Thread.sleep(5000); } catch (java.lang.InterruptedException e) { throw new IOException("Wait Interrupted"); } inSafeMode = nn.setSafeMode(SafeModeAction.SAFEMODE_GET, false); } return inSafeMode; }
/** * This test verifies that if SafeMode is manually entered, name-node does not * come out of safe mode even after the startup safe mode conditions are met. * <ol> * <li>Start cluster with 1 data-node.</li> * <li>Create 2 files with replication 1.</li> * <li>Re-start cluster with 0 data-nodes. * Name-node should stay in automatic safe-mode.</li> * <li>Enter safe mode manually.</li> * <li>Start the data-node.</li> * <li>Wait longer than <tt>dfs.namenode.safemode.extension</tt> and * verify that the name-node is still in safe mode.</li> * </ol> * * @throws IOException */ @Test public void testManualSafeMode() throws IOException { fs = cluster.getFileSystem(); Path file1 = new Path("/tmp/testManualSafeMode/file1"); Path file2 = new Path("/tmp/testManualSafeMode/file2"); // create two files with one block each. DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0); DFSTestUtil.createFile(fs, file2, 1000, (short)1, 0); fs.close(); cluster.shutdown(); // now bring up just the NameNode. cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); assertTrue("No datanode is started. Should be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_GET)); // manually set safemode. dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); // now bring up the datanode and wait for it to be active. cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); // wait longer than dfs.namenode.safemode.extension try { Thread.sleep(2000); } catch (InterruptedException ignored) {} assertTrue("should still be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_GET)); assertFalse("should not be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE)); }
public void testSafeModeUtils() throws IOException { dfs = cluster.getFileSystem(); // Enter safemode. dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); assertTrue("State was expected to be in safemode.", dfs.isInSafeMode()); // Exit safemode. dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); assertFalse("State was expected to be out of safemode.", dfs.isInSafeMode()); }
/** * Download a few fsimages using `hdfs dfsadmin -fetchImage ...' and verify * the results. */ @Test public void testFetchImage() throws Exception { FETCHED_IMAGE_FILE.mkdirs(); Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); FileSystem fs = null; try { DFSAdmin dfsAdmin = new DFSAdmin(); dfsAdmin.setConf(conf); runFetchImage(dfsAdmin, cluster); fs = cluster.getFileSystem(); fs.mkdirs(new Path("/foo")); fs.mkdirs(new Path("/foo2")); fs.mkdirs(new Path("/foo3")); cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); cluster.getNameNodeRpc().saveNamespace(); cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false); runFetchImage(dfsAdmin, cluster); } finally { if (fs != null) { fs.close(); } if (cluster != null) { cluster.shutdown(); } } }
/** * Restart the NameNode, optionally saving a new checkpoint. * * @param fs DistributedFileSystem used for saving namespace * @param persistNamespace boolean true to save a new checkpoint * @throws IOException if restart fails */ private void restart(DistributedFileSystem fs, boolean persistNamespace) throws IOException { if (persistNamespace) { fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs.saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } cluster.restartNameNode(); cluster.waitActive(); }
/** * EditLogOp load test for Truncate. */ @Test public void testTruncateEditLogLoad() throws IOException { // purge previously accumulated edits fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); fs.saveNamespace(); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); int startingFileSize = 2 * BLOCK_SIZE + BLOCK_SIZE / 2; int toTruncate = 1; final String s = "/testTruncateEditLogLoad"; final Path p = new Path(s); byte[] contents = AppendTestUtil.initBuffer(startingFileSize); writeContents(contents, startingFileSize, p); int newLength = startingFileSize - toTruncate; boolean isReady = fs.truncate(p, newLength); assertThat("truncate should have triggered block recovery.", isReady, is(false)); cluster.restartNameNode(); String holder = UserGroupInformation.getCurrentUser().getUserName(); cluster.getNamesystem().recoverLease(s, holder, ""); checkBlockRecovery(p); checkFullFile(p, newLength, contents); fs.delete(p, false); }