void finalizeUpgrade(boolean finalizeEditLog) throws IOException { LOG.info("Finalizing upgrade for local dirs. " + (storage.getLayoutVersion() == 0 ? "" : "\n cur LV = " + storage.getLayoutVersion() + "; cur CTime = " + storage.getCTime())); for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) { StorageDirectory sd = it.next(); NNUpgradeUtil.doFinalize(sd); } if (finalizeEditLog) { // We only do this in the case that HA is enabled and we're active. In any // other case the NN will have done the upgrade of the edits directories // already by virtue of the fact that they're local. editLog.doFinalizeOfSharedLog(); } isUpgradeFinalized = true; }
/** * Rename FSImage with the specific txid */ private void renameCheckpoint(long txid, NameNodeFile fromNnf, NameNodeFile toNnf, boolean renameMD5) throws IOException { ArrayList<StorageDirectory> al = null; for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) { try { renameImageFileInDir(sd, fromNnf, toNnf, txid, renameMD5); } catch (IOException ioe) { LOG.warn("Unable to rename checkpoint in " + sd, ioe); if (al == null) { al = Lists.newArrayList(); } al.add(sd); } } if(al != null) storage.reportErrorsOnDirectories(al); }
/** * Rename all the fsimage files with the specific NameNodeFile type. The * associated checksum files will also be renamed. */ void renameCheckpoint(NameNodeFile fromNnf, NameNodeFile toNnf) throws IOException { ArrayList<StorageDirectory> al = null; FSImageTransactionalStorageInspector inspector = new FSImageTransactionalStorageInspector(EnumSet.of(fromNnf)); storage.inspectStorageDirs(inspector); for (FSImageFile image : inspector.getFoundImages()) { try { renameImageFileInDir(image.sd, fromNnf, toNnf, image.txId, true); } catch (IOException ioe) { LOG.warn("Unable to rename checkpoint in " + image.sd, ioe); if (al == null) { al = Lists.newArrayList(); } al.add(image.sd); } } if(al != null) { storage.reportErrorsOnDirectories(al); } }
private void renameImageFileInDir(StorageDirectory sd, NameNodeFile fromNnf, NameNodeFile toNnf, long txid, boolean renameMD5) throws IOException { final File fromFile = NNStorage.getStorageFile(sd, fromNnf, txid); final File toFile = NNStorage.getStorageFile(sd, toNnf, txid); // renameTo fails on Windows if the destination file already exists. if(LOG.isDebugEnabled()) { LOG.debug("renaming " + fromFile.getAbsolutePath() + " to " + toFile.getAbsolutePath()); } if (!fromFile.renameTo(toFile)) { if (!toFile.delete() || !fromFile.renameTo(toFile)) { throw new IOException("renaming " + fromFile.getAbsolutePath() + " to " + toFile.getAbsolutePath() + " FAILED"); } } if (renameMD5) { MD5FileUtils.renameMD5File(fromFile, toFile); } }
/** * Determine the checkpoint time of the specified StorageDirectory * * @param sd StorageDirectory to check * @return If file exists and can be read, last checkpoint time. If not, 0L. * @throws IOException On errors processing file pointed to by sd */ static long readCheckpointTime(StorageDirectory sd) throws IOException { File timeFile = NNStorage.getStorageFile(sd, NameNodeFile.TIME); long timeStamp = 0L; if (timeFile.exists() && FileUtil.canRead(timeFile)) { DataInputStream in = new DataInputStream(new FileInputStream(timeFile)); try { timeStamp = in.readLong(); in.close(); in = null; } finally { IOUtils.cleanup(LOG, in); } } return timeStamp; }
/** * Return true if this storage dir can roll back to the previous storage * state, false otherwise. The NN will refuse to run the rollback operation * unless at least one JM or fsimage storage directory can roll back. * * @param storage the storage info for the current state * @param prevStorage the storage info for the previous (unupgraded) state * @param targetLayoutVersion the layout version we intend to roll back to * @return true if this JM can roll back, false otherwise. * @throws IOException in the event of error */ static boolean canRollBack(StorageDirectory sd, StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) { // use current directory then LOG.info("Storage directory " + sd.getRoot() + " does not contain previous fs state."); // read and verify consistency with other directories storage.readProperties(sd); return false; } // read and verify consistency of the prev dir prevStorage.readPreviousVersionProperties(sd); if (prevStorage.getLayoutVersion() != targetLayoutVersion) { throw new IOException( "Cannot rollback to storage version " + prevStorage.getLayoutVersion() + " using this version of the NameNode, which uses storage version " + targetLayoutVersion + ". " + "Please use the previous version of HDFS to perform the rollback."); } return true; }
/** * Finalize the upgrade. The previous dir, if any, will be renamed and * removed. After this is completed, rollback is no longer allowed. * * @param sd the storage directory to finalize * @throws IOException in the event of error */ static void doFinalize(StorageDirectory sd) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) { // already discarded LOG.info("Directory " + prevDir + " does not exist."); LOG.info("Finalize upgrade for " + sd.getRoot()+ " is not required."); return; } LOG.info("Finalizing upgrade of storage directory " + sd.getRoot()); Preconditions.checkState(sd.getCurrentDir().exists(), "Current directory must exist."); final File tmpDir = sd.getFinalizedTmp(); // rename previous to tmp and remove NNStorage.rename(prevDir, tmpDir); NNStorage.deleteDir(tmpDir); LOG.info("Finalize upgrade for " + sd.getRoot()+ " is complete."); }
/** * Perform any steps that must succeed across all storage dirs/JournalManagers * involved in an upgrade before proceeding onto the actual upgrade stage. If * a call to any JM's or local storage dir's doPreUpgrade method fails, then * doUpgrade will not be called for any JM. The existing current dir is * renamed to previous.tmp, and then a new, empty current dir is created. * * @param conf configuration for creating {@link EditLogFileOutputStream} * @param sd the storage directory to perform the pre-upgrade procedure. * @throws IOException in the event of error */ static void doPreUpgrade(Configuration conf, StorageDirectory sd) throws IOException { LOG.info("Starting upgrade of storage directory " + sd.getRoot()); // rename current to tmp renameCurToTmp(sd); final File curDir = sd.getCurrentDir(); final File tmpDir = sd.getPreviousTmp(); List<String> fileNameList = IOUtils.listDirectory(tmpDir, new FilenameFilter() { @Override public boolean accept(File dir, String name) { return dir.equals(tmpDir) && name.startsWith(NNStorage.NameNodeFile.EDITS.getName()); } }); for (String s : fileNameList) { File prevFile = new File(tmpDir, s); File newFile = new File(curDir, prevFile.getName()); Files.createLink(newFile.toPath(), prevFile.toPath()); } }
/** * Rename the existing current dir to previous.tmp, and create a new empty * current dir. */ public static void renameCurToTmp(StorageDirectory sd) throws IOException { File curDir = sd.getCurrentDir(); File prevDir = sd.getPreviousDir(); final File tmpDir = sd.getPreviousTmp(); Preconditions.checkState(curDir.exists(), "Current directory must exist for preupgrade."); Preconditions.checkState(!prevDir.exists(), "Previous directory must not exist for preupgrade."); Preconditions.checkState(!tmpDir.exists(), "Previous.tmp directory must not exist for preupgrade." + "Consider restarting for recovery."); // rename current to tmp NNStorage.rename(curDir, tmpDir); if (!curDir.mkdir()) { throw new IOException("Cannot create directory " + curDir); } }
/** * Perform the upgrade of the storage dir to the given storage info. The new * storage info is written into the current directory, and the previous.tmp * directory is renamed to previous. * * @param sd the storage directory to upgrade * @param storage info about the new upgraded versions. * @throws IOException in the event of error */ public static void doUpgrade(StorageDirectory sd, Storage storage) throws IOException { LOG.info("Performing upgrade of storage directory " + sd.getRoot()); try { // Write the version file, since saveFsImage only makes the // fsimage_<txid>, and the directory is otherwise empty. storage.writeProperties(sd); File prevDir = sd.getPreviousDir(); File tmpDir = sd.getPreviousTmp(); Preconditions.checkState(!prevDir.exists(), "previous directory must not exist for upgrade."); Preconditions.checkState(tmpDir.exists(), "previous.tmp directory must exist for upgrade."); // rename tmp to previous NNStorage.rename(tmpDir, prevDir); } catch (IOException ioe) { LOG.error("Unable to rename temp to previous for " + sd.getRoot(), ioe); throw ioe; } }
/** * Perform rollback of the storage dir to the previous state. The existing * current dir is removed, and the previous dir is renamed to current. * * @param sd the storage directory to roll back. * @throws IOException in the event of error */ static void doRollBack(StorageDirectory sd) throws IOException { File prevDir = sd.getPreviousDir(); if (!prevDir.exists()) { return; } File tmpDir = sd.getRemovedTmp(); Preconditions.checkState(!tmpDir.exists(), "removed.tmp directory must not exist for rollback." + "Consider restarting for recovery."); // rename current to tmp File curDir = sd.getCurrentDir(); Preconditions.checkState(curDir.exists(), "Current directory must exist for rollback."); NNStorage.rename(curDir, tmpDir); // rename previous to current NNStorage.rename(prevDir, curDir); // delete tmp dir NNStorage.deleteDir(tmpDir); LOG.info("Rollback of " + sd.getRoot() + " is complete."); }
@Override public void selectInputStreams(Collection<EditLogInputStream> streams, long fromTxId, boolean inProgressOk) { Iterator<StorageDirectory> iter = storage.dirIterator(); while (iter.hasNext()) { StorageDirectory dir = iter.next(); List<EditLogFile> editFiles; try { editFiles = FileJournalManager.matchEditLogs( dir.getCurrentDir()); } catch (IOException ioe) { throw new RuntimeException(ioe); } FileJournalManager.addStreamsToCollectionFromFiles(editFiles, streams, fromTxId, inProgressOk); } }
/** * Confirm that FSImage files in all StorageDirectory are the same, * and non-empty, and there are the expected number of them. * @param fsn - the FSNamesystem being checked. * @param numImageDirs - the configured number of StorageDirectory of type IMAGE. * @return - the md5 hash of the most recent FSImage files, which must all be the same. * @throws AssertionError if image files are empty or different, * if less than two StorageDirectory are provided, or if the * actual number of StorageDirectory is less than configured. */ public static String checkImages( FSNamesystem fsn, int numImageDirs) throws Exception { NNStorage stg = fsn.getFSImage().getStorage(); //any failed StorageDirectory is removed from the storageDirs list assertEquals("Some StorageDirectories failed Upgrade", numImageDirs, stg.getNumStorageDirs(NameNodeDirType.IMAGE)); assertTrue("Not enough fsimage copies in MiniDFSCluster " + "to test parallel write", numImageDirs > 1); // List of "current/" directory from each SD List<File> dirs = FSImageTestUtil.getCurrentDirs(stg, NameNodeDirType.IMAGE); // across directories, all files with same names should be identical hashes FSImageTestUtil.assertParallelFilesAreIdentical( dirs, Collections.<String>emptySet()); FSImageTestUtil.assertSameNewestImage(dirs); // Return the hash of the newest image file StorageDirectory firstSd = stg.dirIterator(NameNodeDirType.IMAGE).next(); File latestImage = FSImageTestUtil.findLatestImageFile(firstSd); String md5 = FSImageTestUtil.getImageFileMD5IgnoringTxId(latestImage); System.err.println("md5 of " + latestImage + ": " + md5); return md5; }
private static NNStorage mockStorageForDirs(final StorageDirectory ... mockDirs) throws IOException { NNStorage mockStorage = Mockito.mock(NNStorage.class); Mockito.doAnswer(new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { FSImageStorageInspector inspector = (FSImageStorageInspector) invocation.getArguments()[0]; for (StorageDirectory sd : mockDirs) { inspector.inspectDirectory(sd); } return null; } }).when(mockStorage).inspectStorageDirs( Mockito.<FSImageStorageInspector>anyObject()); return mockStorage; }
private static void createEmptyInProgressEditLog(MiniDFSCluster cluster, NameNode nn, boolean writeHeader) throws IOException { long txid = nn.getNamesystem().getEditLog().getLastWrittenTxId(); URI sharedEditsUri = cluster.getSharedEditsDir(0, 1); File sharedEditsDir = new File(sharedEditsUri.getPath()); StorageDirectory storageDir = new StorageDirectory(sharedEditsDir); File inProgressFile = NameNodeAdapter.getInProgressEditsFile(storageDir, txid + 1); assertTrue("Failed to create in-progress edits file", inProgressFile.createNewFile()); if (writeHeader) { DataOutputStream out = new DataOutputStream(new FileOutputStream( inProgressFile)); EditLogFileOutputStream.writeHeader( NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION, out); out.close(); } }
/** * Simple test with image, edits, and inprogress edits */ @Test public void testCurrentStorageInspector() throws IOException { FSImageTransactionalStorageInspector inspector = new FSImageTransactionalStorageInspector(); StorageDirectory mockDir = FSImageTestUtil.mockStorageDirectory( NameNodeDirType.IMAGE_AND_EDITS, false, "/foo/current/" + getImageFileName(123), "/foo/current/" + getFinalizedEditsFileName(123, 456), "/foo/current/" + getImageFileName(456), "/foo/current/" + getInProgressEditsFileName(457)); inspector.inspectDirectory(mockDir); assertEquals(2, inspector.foundImages.size()); FSImageFile latestImage = inspector.getLatestImages().get(0); assertEquals(456, latestImage.txId); assertSame(mockDir, latestImage.sd); assertTrue(inspector.isUpgradeFinalized()); assertEquals(new File("/foo/current/"+getImageFileName(456)), latestImage.getFile()); }
@Override public Void answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); StorageDirectory sd = (StorageDirectory)args[1]; if (count++ == 1) { LOG.info("Injecting fault for sd: " + sd); if (throwRTE) { throw new RuntimeException("Injected fault: saveFSImage second time"); } else { throw new IOException("Injected fault: saveFSImage second time"); } } LOG.info("Not injecting fault for sd: " + sd); return (Void)invocation.callRealMethod(); }
/** * Test that the NN locks its storage and edits directories, and won't start up * if the directories are already locked **/ @Test public void testNameDirLocking() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; // Start a NN, and verify that lock() fails in all of the configured // directories StorageDirectory savedSd = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); NNStorage storage = cluster.getNameNode().getFSImage().getStorage(); for (StorageDirectory sd : storage.dirIterable(null)) { assertLockFails(sd); savedSd = sd; } } finally { cleanup(cluster); cluster = null; } assertNotNull(savedSd); // Lock one of the saved directories, then start the NN, and make sure it // fails to start assertClusterStartFailsWhenDirLocked(conf, savedSd); }
/** * Assert that, if sdToLock is locked, the cluster is not allowed to start up. * @param conf cluster conf to use * @param sdToLock the storage directory to lock */ private static void assertClusterStartFailsWhenDirLocked( Configuration conf, StorageDirectory sdToLock) throws IOException { // Lock the edits dir, then start the NN, and make sure it fails to start sdToLock.lock(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).format(false) .manageNameDfsDirs(false).numDataNodes(0).build(); assertFalse("cluster should fail to start after locking " + sdToLock, sdToLock.isLockSupported()); } catch (IOException ioe) { GenericTestUtils.assertExceptionContains("already locked", ioe); } finally { cleanup(cluster); cluster = null; sdToLock.unlock(); } }
/** * Test the normal operation of loading transactions from * file journal manager. 3 edits directories are setup without any * failures. Test that we read in the expected number of transactions. */ @Test public void testNormalOperation() throws IOException { File f1 = new File(TestEditLog.TEST_DIR + "/normtest0"); File f2 = new File(TestEditLog.TEST_DIR + "/normtest1"); File f3 = new File(TestEditLog.TEST_DIR + "/normtest2"); List<URI> editUris = ImmutableList.of(f1.toURI(), f2.toURI(), f3.toURI()); NNStorage storage = setupEdits(editUris, 5); long numJournals = 0; for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) { FileJournalManager jm = new FileJournalManager(conf, sd, storage); assertEquals(6*TXNS_PER_ROLL, getNumberOfTransactions(jm, 1, true, false)); numJournals++; } assertEquals(3, numJournals); }
/** * Test a mixture of inprogress files and finalised. Set up 3 edits * directories and fail the second on the last roll. Verify that reading * the transactions, reads from the finalised directories. */ @Test public void testInprogressRecoveryMixed() throws IOException { File f1 = new File(TestEditLog.TEST_DIR + "/mixtest0"); File f2 = new File(TestEditLog.TEST_DIR + "/mixtest1"); File f3 = new File(TestEditLog.TEST_DIR + "/mixtest2"); List<URI> editUris = ImmutableList.of(f1.toURI(), f2.toURI(), f3.toURI()); // abort after the 5th roll NNStorage storage = setupEdits(editUris, 5, new AbortSpec(5, 1)); Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS); StorageDirectory sd = dirs.next(); FileJournalManager jm = new FileJournalManager(conf, sd, storage); assertEquals(6*TXNS_PER_ROLL, getNumberOfTransactions(jm, 1, true, false)); sd = dirs.next(); jm = new FileJournalManager(conf, sd, storage); assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, getNumberOfTransactions(jm, 1, true, false)); sd = dirs.next(); jm = new FileJournalManager(conf, sd, storage); assertEquals(6*TXNS_PER_ROLL, getNumberOfTransactions(jm, 1, true, false)); }
@Test(expected=IllegalStateException.class) public void testFinalizeErrorReportedToNNStorage() throws IOException, InterruptedException { File f = new File(TestEditLog.TEST_DIR + "/filejournaltestError"); // abort after 10th roll NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 10, new AbortSpec(10, 0)); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); FileJournalManager jm = new FileJournalManager(conf, sd, storage); String sdRootPath = sd.getRoot().getAbsolutePath(); FileUtil.chmod(sdRootPath, "-w", true); try { jm.finalizeLogSegment(0, 1); } finally { FileUtil.chmod(sdRootPath, "+w", true); assertTrue(storage.getRemovedStorageDirs().contains(sd)); } }
/** * Test that we can read from a stream created by FileJournalManager. * Create a single edits directory, failing it on the final roll. * Then try loading from the point of the 3rd roll. Verify that we read * the correct number of transactions from this point. */ @Test public void testReadFromStream() throws IOException { File f = new File(TestEditLog.TEST_DIR + "/readfromstream"); // abort after 10th roll NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 10, new AbortSpec(10, 0)); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); FileJournalManager jm = new FileJournalManager(conf, sd, storage); long expectedTotalTxnCount = TXNS_PER_ROLL*10 + TXNS_PER_FAIL; assertEquals(expectedTotalTxnCount, getNumberOfTransactions(jm, 1, true, false)); long skippedTxns = (3*TXNS_PER_ROLL); // skip first 3 files long startingTxId = skippedTxns + 1; long numLoadable = getNumberOfTransactions(jm, startingTxId, true, false); assertEquals(expectedTotalTxnCount - skippedTxns, numLoadable); }
/** * Make requests with starting transaction ids which don't match the beginning * txid of some log segments. * * This should succeed. */ @Test public void testAskForTransactionsMidfile() throws IOException { File f = new File(TestEditLog.TEST_DIR + "/askfortransactionsmidfile"); NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 10); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); FileJournalManager jm = new FileJournalManager(conf, sd, storage); // 10 rolls, so 11 rolled files, 110 txids total. final int TOTAL_TXIDS = 10 * 11; for (int txid = 1; txid <= TOTAL_TXIDS; txid++) { assertEquals((TOTAL_TXIDS - txid) + 1, getNumberOfTransactions(jm, txid, true, false)); } }
/** * Test that we can load an edits directory with a corrupt inprogress file. * The corrupt inprogress file should be moved to the side. */ @Test public void testManyLogsWithCorruptInprogress() throws IOException { File f = new File(TestEditLog.TEST_DIR + "/manylogswithcorruptinprogress"); NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 10, new AbortSpec(10, 0)); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); File[] files = new File(f, "current").listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name.startsWith("edits_inprogress")) { return true; } return false; } }); assertEquals(files.length, 1); corruptAfterStartSegment(files[0]); FileJournalManager jm = new FileJournalManager(conf, sd, storage); assertEquals(10*TXNS_PER_ROLL+1, getNumberOfTransactions(jm, 1, true, false)); }
@Test public void testGetRemoteEditLog() throws IOException { StorageDirectory sd = FSImageTestUtil.mockStorageDirectory( NameNodeDirType.EDITS, false, NNStorage.getFinalizedEditsFileName(1, 100), NNStorage.getFinalizedEditsFileName(101, 200), NNStorage.getInProgressEditsFileName(201), NNStorage.getFinalizedEditsFileName(1001, 1100)); // passing null for NNStorage because this unit test will not use it FileJournalManager fjm = new FileJournalManager(conf, sd, null); assertEquals("[1,100],[101,200],[1001,1100]", getLogsAsString(fjm, 1)); assertEquals("[101,200],[1001,1100]", getLogsAsString(fjm, 101)); assertEquals("[101,200],[1001,1100]", getLogsAsString(fjm, 150)); assertEquals("[1001,1100]", getLogsAsString(fjm, 201)); assertEquals("Asking for a newer log than exists should return empty list", "", getLogsAsString(fjm, 9999)); }
/** * Make sure that we starting reading the correct op when we request a stream * with a txid in the middle of an edit log file. */ @Test public void testReadFromMiddleOfEditLog() throws CorruptionException, IOException { File f = new File(TestEditLog.TEST_DIR + "/readfrommiddleofeditlog"); NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 10); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); FileJournalManager jm = new FileJournalManager(conf, sd, storage); EditLogInputStream elis = getJournalInputStream(jm, 5, true); try { FSEditLogOp op = elis.readOp(); assertEquals("read unexpected op", op.getTransactionId(), 5); } finally { IOUtils.cleanup(LOG, elis); } }
/** * Make sure that in-progress streams aren't counted if we don't ask for * them. */ @Test public void testExcludeInProgressStreams() throws CorruptionException, IOException { File f = new File(TestEditLog.TEST_DIR + "/excludeinprogressstreams"); // Don't close the edit log once the files have been set up. NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 10, false); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); FileJournalManager jm = new FileJournalManager(conf, sd, storage); // If we exclude the in-progess stream, we should only have 100 tx. assertEquals(100, getNumberOfTransactions(jm, 1, false, false)); EditLogInputStream elis = getJournalInputStream(jm, 90, false); try { FSEditLogOp lastReadOp = null; while ((lastReadOp = elis.readOp()) != null) { assertTrue(lastReadOp.getTransactionId() <= 100); } } finally { IOUtils.cleanup(LOG, elis); } }
/** * Tests that internal renames are done using native code on platforms that * have it. The native rename includes more detailed information about the * failure, which can be useful for troubleshooting. */ @Test public void testDoPreUpgradeIOError() throws IOException { File storageDir = new File(TestEditLog.TEST_DIR, "preupgradeioerror"); List<URI> editUris = Collections.singletonList(storageDir.toURI()); NNStorage storage = setupEdits(editUris, 5); StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next(); assertNotNull(sd); // Change storage directory so that renaming current to previous.tmp fails. FileUtil.setWritable(storageDir, false); FileJournalManager jm = null; try { jm = new FileJournalManager(conf, sd, storage); exception.expect(IOException.class); if (NativeCodeLoader.isNativeCodeLoaded()) { exception.expectMessage("failure in native rename"); } jm.doPreUpgrade(); } finally { IOUtils.cleanup(LOG, jm); // Restore permissions on storage directory and make sure we can delete. FileUtil.setWritable(storageDir, true); FileUtil.fullyDelete(storageDir); } }
/** * Create a <code>version</code> file for datanode inside the specified parent * directory. If such a file already exists, it will be overwritten. * The given version string will be written to the file as the layout * version. None of the parameters may be null. * * @param parent directory where namenode VERSION file is stored * @param version StorageInfo to create VERSION file from * @param bpid Block pool Id * @param bpidToWrite Block pool Id to write into the version file */ public static void createDataNodeVersionFile(File[] parent, StorageInfo version, String bpid, String bpidToWrite) throws IOException { DataStorage storage = new DataStorage(version); storage.setDatanodeUuid("FixedDatanodeUuid"); File[] versionFiles = new File[parent.length]; for (int i = 0; i < parent.length; i++) { File versionFile = new File(parent[i], "VERSION"); StorageDirectory sd = new StorageDirectory(parent[i].getParentFile()); storage.createStorageID(sd, false); storage.writeProperties(versionFile, sd); versionFiles[i] = versionFile; File bpDir = BlockPoolSliceStorage.getBpRoot(bpid, parent[i]); createBlockPoolVersionFile(bpDir, version, bpidToWrite); } }
/** * Return a standalone instance of FSEditLog that will log into the given * log directory. The returned instance is not yet opened. */ public static FSEditLog createStandaloneEditLog(File logDir) throws IOException { assertTrue(logDir.mkdirs() || logDir.exists()); if (!FileUtil.fullyDeleteContents(logDir)) { throw new IOException("Unable to delete contents of " + logDir); } NNStorage storage = Mockito.mock(NNStorage.class); StorageDirectory sd = FSImageTestUtil.mockStorageDirectory(logDir, NameNodeDirType.EDITS); List<StorageDirectory> sds = Lists.newArrayList(sd); Mockito.doReturn(sds).when(storage).dirIterable(NameNodeDirType.EDITS); Mockito.doReturn(sd).when(storage) .getStorageDirectory(Matchers.<URI>anyObject()); FSEditLog editLog = new FSEditLog(new Configuration(), storage, ImmutableList.of(logDir.toURI())); editLog.initJournalsForWrite(); return editLog; }
private void doUpgrade(NNStorage storage) throws IOException { for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) { StorageDirectory sd = it.next(); NNUpgradeUtil.doUpgrade(sd, storage); } }
/** Check if upgrade is in progress. */ public static void checkUpgrade(NNStorage storage) throws IOException { // Upgrade or rolling upgrade is allowed only if there are // no previous fs states in any of the directories for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) { StorageDirectory sd = it.next(); if (sd.getPreviousDir().exists()) throw new InconsistentFSStateException(sd.getRoot(), "previous fs state should not exist during upgrade. " + "Finalize or rollback first."); } }
void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery, FSImageFile imageFile, StartupOption startupOption) throws IOException { LOG.debug("Planning to load image :\n" + imageFile); StorageDirectory sdForProperties = imageFile.sd; storage.readProperties(sdForProperties, startupOption); if (NameNodeLayoutVersion.supports( LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) { // For txid-based layout, we should have a .md5 file // next to the image file boolean isRollingRollback = RollingUpgradeStartupOption.ROLLBACK .matches(startupOption); loadFSImage(imageFile.getFile(), target, recovery, isRollingRollback); } else if (NameNodeLayoutVersion.supports( LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) { // In 0.22, we have the checksum stored in the VERSION file. String md5 = storage.getDeprecatedProperty( NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY); if (md5 == null) { throw new InconsistentFSStateException(sdForProperties.getRoot(), "Message digest property " + NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY + " not set for storage directory " + sdForProperties.getRoot()); } loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery, false); } else { // We don't have any record of the md5sum loadFSImage(imageFile.getFile(), null, target, recovery, false); } }
/** * Save the contents of the FS image to the file. */ void saveFSImage(SaveNamespaceContext context, StorageDirectory sd, NameNodeFile dstType) throws IOException { long txid = context.getTxId(); File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid); File dstFile = NNStorage.getStorageFile(sd, dstType, txid); FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context); FSImageCompression compression = FSImageCompression.createCompression(conf); saver.save(newFile, compression); MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest()); storage.setMostRecentCheckpointInfo(txid, Time.now()); }
/** * Deletes the checkpoint file in every storage directory, * since the checkpoint was cancelled. */ private void deleteCancelledCheckpoint(long txid) throws IOException { ArrayList<StorageDirectory> al = Lists.newArrayList(); for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) { File ckpt = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid); if (ckpt.exists() && !ckpt.delete()) { LOG.warn("Unable to delete cancelled checkpoint in " + sd); al.add(sd); } } storage.reportErrorsOnDirectories(al); }
/** * This is called by the 2NN after having downloaded an image, and by * the NN after having received a new image from the 2NN. It * renames the image from fsimage_N.ckpt to fsimage_N and also * saves the related .md5 file into place. */ public synchronized void saveDigestAndRenameCheckpointImage(NameNodeFile nnf, long txid, MD5Hash digest) throws IOException { // Write and rename MD5 file List<StorageDirectory> badSds = Lists.newArrayList(); for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) { File imageFile = NNStorage.getImageFile(sd, nnf, txid); try { MD5FileUtils.saveMD5File(imageFile, digest); } catch (IOException ioe) { badSds.add(sd); } } storage.reportErrorsOnDirectories(badSds); CheckpointFaultInjector.getInstance().afterMD5Rename(); // Rename image from tmp file renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, nnf, false); // So long as this is the newest image available, // advertise it as such to other checkpointers // from now on if (txid > storage.getMostRecentCheckpointTxId()) { storage.setMostRecentCheckpointInfo(txid, Time.now()); } }
FSImageFile(StorageDirectory sd, File file, long txId) { assert txId >= 0 || txId == HdfsConstants.INVALID_TXID : "Invalid txid on " + file +": " + txId; this.sd = sd; this.txId = txId; this.file = file; }
/** * @return a list with the paths to EDITS and EDITS_NEW (if it exists) * in a given storage directory. */ static List<File> getEditsInStorageDir(StorageDirectory sd) { ArrayList<File> files = new ArrayList<File>(); File edits = NNStorage.getStorageFile(sd, NameNodeFile.EDITS); assert edits.exists() : "Expected edits file at " + edits; files.add(edits); File editsNew = NNStorage.getStorageFile(sd, NameNodeFile.EDITS_NEW); if (editsNew.exists()) { files.add(editsNew); } return files; }
@Override public void purgeLogsOlderThan(long minTxIdToKeep) throws IOException { Iterator<StorageDirectory> iter = storage.dirIterator(); while (iter.hasNext()) { StorageDirectory dir = iter.next(); List<EditLogFile> editFiles = FileJournalManager.matchEditLogs( dir.getCurrentDir()); for (EditLogFile f : editFiles) { if (f.getLastTxId() < minTxIdToKeep) { purger.purgeLog(f); } } } }