/** * Make sure that an HA NN will start if a previous upgrade was in progress. */ @Test public void testStartingWithUpgradeInProgressSucceeds() throws Exception { MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); // Simulate an upgrade having started. for (int i = 0; i < 2; i++) { for (URI uri : cluster.getNameDirs(i)) { File prevTmp = new File(new File(uri), Storage.STORAGE_TMP_PREVIOUS); LOG.info("creating previous tmp dir: " + prevTmp); assertTrue(prevTmp.mkdirs()); } } cluster.restartNameNodes(); } finally { if (cluster != null) { cluster.shutdown(); } } }
@Before public void setUpCluster() throws Exception { conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 10); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); HAUtil.setAllowStandbyReads(conf, true); if (clusterType == TestType.SHARED_DIR_HA) { MiniDFSNNTopology topology = MiniQJMHACluster.createDefaultTopology(10000); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(topology) .numDataNodes(0) .checkExitOnShutdown(false) .build(); } else { Builder builder = new MiniQJMHACluster.Builder(conf); builder.getDfsBuilder().numDataNodes(0).checkExitOnShutdown(false); miniQjmHaCluster = builder.build(); cluster = miniQjmHaCluster.getDfsCluster(); } cluster.waitActive(); nn0 = cluster.getNameNode(0); nn1 = cluster.getNameNode(1); cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); }
@Before public void setUpCluster() throws Exception { conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 10); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); HAUtil.setAllowStandbyReads(conf, true); if (clusterType == TestType.SHARED_DIR_HA) { MiniDFSNNTopology topology = MiniQJMHACluster.createDefaultTopology(); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(topology) .numDataNodes(0) .checkExitOnShutdown(false) .build(); } else { Builder builder = new MiniQJMHACluster.Builder(conf); builder.getDfsBuilder().numDataNodes(0).checkExitOnShutdown(false); miniQjmHaCluster = builder.build(); cluster = miniQjmHaCluster.getDfsCluster(); } cluster.waitActive(); nn0 = cluster.getNameNode(0); nn1 = cluster.getNameNode(1); cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); }
/** * Make sure that an HA NN with NFS-based HA can successfully start and * upgrade. */ @Test public void testNfsUpgrade() throws IOException, URISyntaxException { MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); // No upgrade is in progress at the moment. checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); checkPreviousDirExistence(sharedDir, false); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkPreviousDirExistence(sharedDir, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. assertTrue(fs.mkdirs(new Path("/foo2"))); // Restart NN0 without the -upgrade flag, to make sure that works. cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); cluster.restartNameNode(0, false); // Make sure we can still do FS ops after upgrading. cluster.transitionToActive(0); assertTrue(fs.mkdirs(new Path("/foo3"))); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); // Now restart NN1 and make sure that we can do ops against that as well. cluster.restartNameNode(1); cluster.transitionToStandby(0); cluster.transitionToActive(1); assertTrue(fs.mkdirs(new Path("/foo4"))); assertCTimesEqual(cluster); } finally { if (fs != null) { fs.close(); } if (cluster != null) { cluster.shutdown(); } } }
/** * Make sure that an HA NN can successfully upgrade when configured using * JournalNodes. */ @Test public void testUpgradeWithJournalNodes() throws IOException, URISyntaxException { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { Builder builder = new MiniQJMHACluster.Builder(conf); builder.getDfsBuilder() .numDataNodes(0); qjCluster = builder.build(); MiniDFSCluster cluster = qjCluster.getDfsCluster(); // No upgrade is in progress at the moment. checkJnPreviousDirExistence(qjCluster, false); checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); // get the value of the committedTxnId in journal nodes final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkJnPreviousDirExistence(qjCluster, true); assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster)); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. assertTrue(fs.mkdirs(new Path("/foo2"))); // Restart NN0 without the -upgrade flag, to make sure that works. cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); cluster.restartNameNode(0, false); // Make sure we can still do FS ops after upgrading. cluster.transitionToActive(0); assertTrue(fs.mkdirs(new Path("/foo3"))); assertTrue(getCommittedTxnIdValue(qjCluster) > cidBeforeUpgrade); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); // Now restart NN1 and make sure that we can do ops against that as well. cluster.restartNameNode(1); cluster.transitionToStandby(0); cluster.transitionToActive(1); assertTrue(fs.mkdirs(new Path("/foo4"))); assertCTimesEqual(cluster); } finally { if (fs != null) { fs.close(); } if (qjCluster != null) { qjCluster.shutdown(); } } }
@Test public void testFinalizeWithJournalNodes() throws IOException, URISyntaxException { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { Builder builder = new MiniQJMHACluster.Builder(conf); builder.getDfsBuilder() .numDataNodes(0); qjCluster = builder.build(); MiniDFSCluster cluster = qjCluster.getDfsCluster(); // No upgrade is in progress at the moment. checkJnPreviousDirExistence(qjCluster, false); checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster)); assertTrue(fs.mkdirs(new Path("/foo2"))); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkJnPreviousDirExistence(qjCluster, true); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); cluster.restartNameNode(1); final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster); assertTrue(cidDuringUpgrade > cidBeforeUpgrade); runFinalizeCommand(cluster); assertEquals(cidDuringUpgrade, getCommittedTxnIdValue(qjCluster)); checkClusterPreviousDirExistence(cluster, false); checkJnPreviousDirExistence(qjCluster, false); assertCTimesEqual(cluster); } finally { if (fs != null) { fs.close(); } if (qjCluster != null) { qjCluster.shutdown(); } } }
/** * Make sure that even if the NN which initiated the upgrade is in the standby * state that we're allowed to finalize. */ @Test public void testFinalizeFromSecondNameNodeWithJournalNodes() throws IOException, URISyntaxException { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { Builder builder = new MiniQJMHACluster.Builder(conf); builder.getDfsBuilder() .numDataNodes(0); qjCluster = builder.build(); MiniDFSCluster cluster = qjCluster.getDfsCluster(); // No upgrade is in progress at the moment. checkJnPreviousDirExistence(qjCluster, false); checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkJnPreviousDirExistence(qjCluster, true); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); cluster.restartNameNode(1); // Make the second NN (not the one that initiated the upgrade) active when // the finalize command is run. cluster.transitionToStandby(0); cluster.transitionToActive(1); runFinalizeCommand(cluster); checkClusterPreviousDirExistence(cluster, false); checkJnPreviousDirExistence(qjCluster, false); assertCTimesEqual(cluster); } finally { if (fs != null) { fs.close(); } if (qjCluster != null) { qjCluster.shutdown(); } } }
/** * Test rollback with NFS shared dir. */ @Test public void testRollbackWithNfs() throws Exception { MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); // No upgrade is in progress at the moment. checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); checkPreviousDirExistence(sharedDir, false); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkPreviousDirExistence(sharedDir, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. assertTrue(fs.mkdirs(new Path("/foo2"))); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); cluster.restartNameNode(1); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, true); checkPreviousDirExistence(sharedDir, true); assertCTimesEqual(cluster); // Now shut down the cluster and do the rollback. Collection<URI> nn1NameDirs = cluster.getNameDirs(0); cluster.shutdown(); conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs)); NameNode.doRollback(conf, false); // The rollback operation should have rolled back the first NN's local // dirs, and the shared dir, but not the other NN's dirs. Those have to be // done by bootstrapping the standby. checkNnPreviousDirExistence(cluster, 0, false); checkPreviousDirExistence(sharedDir, false); } finally { if (fs != null) { fs.close(); } if (cluster != null) { cluster.shutdown(); } } }
@Test public void testRollbackWithJournalNodes() throws IOException, URISyntaxException { MiniQJMHACluster qjCluster = null; FileSystem fs = null; try { Builder builder = new MiniQJMHACluster.Builder(conf); builder.getDfsBuilder() .numDataNodes(0); qjCluster = builder.build(); MiniDFSCluster cluster = qjCluster.getDfsCluster(); // No upgrade is in progress at the moment. checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); checkJnPreviousDirExistence(qjCluster, false); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkJnPreviousDirExistence(qjCluster, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. assertTrue(fs.mkdirs(new Path("/foo2"))); final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster); assertTrue(cidDuringUpgrade > cidBeforeUpgrade); // Now bootstrap the standby with the upgraded info. int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); assertEquals(0, rc); cluster.restartNameNode(1); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, true); checkJnPreviousDirExistence(qjCluster, true); assertCTimesEqual(cluster); // Shut down the NNs, but deliberately leave the JNs up and running. Collection<URI> nn1NameDirs = cluster.getNameDirs(0); cluster.shutdown(); conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs)); NameNode.doRollback(conf, false); final long cidAfterRollback = getCommittedTxnIdValue(qjCluster); assertTrue(cidBeforeUpgrade < cidAfterRollback); // make sure the committedTxnId has been reset correctly after rollback assertTrue(cidDuringUpgrade > cidAfterRollback); // The rollback operation should have rolled back the first NN's local // dirs, and the shared dir, but not the other NN's dirs. Those have to be // done by bootstrapping the standby. checkNnPreviousDirExistence(cluster, 0, false); checkJnPreviousDirExistence(qjCluster, false); } finally { if (fs != null) { fs.close(); } if (qjCluster != null) { qjCluster.shutdown(); } } }
/** * Make sure that starting a second NN with the -upgrade flag fails if the * other NN has already done that. */ @Test public void testCannotUpgradeSecondNameNode() throws IOException, URISyntaxException { MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); // No upgrade is in progress at the moment. checkClusterPreviousDirExistence(cluster, false); assertCTimesEqual(cluster); checkPreviousDirExistence(sharedDir, false); // Transition NN0 to active and do some FS ops. cluster.transitionToActive(0); fs = HATestUtil.configureFailoverFs(cluster, conf); assertTrue(fs.mkdirs(new Path("/foo1"))); // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade // flag. cluster.shutdownNameNode(1); cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); cluster.restartNameNode(0, false); checkNnPreviousDirExistence(cluster, 0, true); checkNnPreviousDirExistence(cluster, 1, false); checkPreviousDirExistence(sharedDir, true); // NN0 should come up in the active state when given the -upgrade option, // so no need to transition it to active. assertTrue(fs.mkdirs(new Path("/foo2"))); // Restart NN0 without the -upgrade flag, to make sure that works. cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); cluster.restartNameNode(0, false); // Make sure we can still do FS ops after upgrading. cluster.transitionToActive(0); assertTrue(fs.mkdirs(new Path("/foo3"))); // Make sure that starting the second NN with the -upgrade flag fails. cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE); try { cluster.restartNameNode(1, false); fail("Should not have been able to start second NN with -upgrade"); } catch (IOException ioe) { GenericTestUtils.assertExceptionContains( "It looks like the shared log is already being upgraded", ioe); } } finally { if (fs != null) { fs.close(); } if (cluster != null) { cluster.shutdown(); } } }