/** * test */ public void printStorages(FSImage fs) { LOG.info("current storages and corresoponding sizes:"); for (Iterator<StorageDirectory> it = fs.dirIterator(); it.hasNext();) { StorageDirectory sd = it.next(); if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) { File imf = FSImage.getImageFile(sd, NameNodeFile.IMAGE); LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length()); } if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) { File edf = FSImage.getImageFile(sd, NameNodeFile.EDITS); LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length()); } } }
/** * verify that edits log and fsimage are in different directories and of a correct size */ private void verifyDifferentDirs(FSImage img, long expectedImgSize, long expectedEditsSize) { StorageDirectory sd =null; for (Iterator<StorageDirectory> it = img.dirIterator(); it.hasNext();) { sd = it.next(); if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) { File imf = FSImage.getImageFile(sd, NameNodeFile.IMAGE); LOG.info("--image file " + imf.getAbsolutePath() + "; len = " + imf.length() + "; expected = " + expectedImgSize); assertEquals(expectedImgSize, imf.length()); } else if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) { File edf = FSImage.getImageFile(sd, NameNodeFile.EDITS); LOG.info("-- edits file " + edf.getAbsolutePath() + "; len = " + edf.length() + "; expected = " + expectedEditsSize); assertEquals(expectedEditsSize, edf.length()); } else { fail("Image/Edits directories are not different"); } } }
/** * Merge image and edits, and verify consistency with the signature. */ private void doMerge(CheckpointSignature sig) throws IOException { getEditLog().open(); StorageDirectory sdName = null; StorageDirectory sdEdits = null; Iterator<StorageDirectory> it = null; it = dirIterator(NameNodeDirType.IMAGE); if (it.hasNext()) sdName = it.next(); it = dirIterator(NameNodeDirType.EDITS); if (it.hasNext()) sdEdits = it.next(); if ((sdName == null) || (sdEdits == null)) throw new IOException("Could not locate checkpoint directories"); this.layoutVersion = -1; // to avoid assert in loadFSImage() loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE)); loadFSEdits(sdEdits); sig.validateStorageInfo(this); saveNamespace(false); }
/** * Closes the current edit log and opens edits.new. */ synchronized void rollEditLog() throws IOException { waitForSyncToFinish(); Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS); if(!it.hasNext()) return; // // If edits.new already exists in some directory, verify it // exists in all directories. // boolean alreadyExists = existsNew(it.next()); while(it.hasNext()) { StorageDirectory sd = it.next(); if(alreadyExists != existsNew(sd)) throw new IOException(getEditNewFile(sd) + "should " + (alreadyExists ? "" : "not ") + "exist."); } if(alreadyExists) return; // nothing to do, edits.new exists! // check if any of failed storage is now available and put it back fsimage.attemptRestoreRemovedStorage(); divertFileStreams( Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS_NEW.getName()); }
/** * test */ public void printStorages(FSImage fs) { LOG.info("current storages and corresoponding sizes:"); for(Iterator<StorageDirectory> it = fs.dirIterator(); it.hasNext(); ) { StorageDirectory sd = it.next(); if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) { File imf = FSImage.getImageFile(sd, NameNodeFile.IMAGE); LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length()); } if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) { File edf = FSImage.getImageFile(sd, NameNodeFile.EDITS); LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length()); } } }
/** * Test that a corrupted fstime file in a single storage directory does not * prevent the NN from starting up. */ @Test public void testFsTimeFileCorrupt() throws IOException, InterruptedException { assertEquals(cluster.getNameDirs().size(), 2); // Get the first fstime file and truncate it. truncateStorageDirFile(cluster, NameNodeFile.TIME, 0); // Make sure we can start up despite the fact the fstime file is corrupted. cluster.restartNameNode(); }
private static void truncateStorageDirFile(MiniDFSCluster cluster, NameNodeFile f, int storageDirIndex) throws IOException { File currentDir = cluster.getNameNode().getFSImage() .getStorageDir(storageDirIndex).getCurrentDir(); File nameNodeFile = new File(currentDir, f.getName()); assertTrue(nameNodeFile.isFile()); assertTrue(nameNodeFile.delete()); assertTrue(nameNodeFile.createNewFile()); }
void runTest(EditFileModifier modifier) throws IOException { //set toleration length final Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_TOLERATION_LENGTH_KEY, TOLERATION_LENGTH); final MiniDFSCluster cluster = new MiniDFSCluster(conf, 0, true, null); try { cluster.waitActive(); //add a few transactions and then shutdown namenode. final FileSystem fs = cluster.getFileSystem(); fs.mkdirs(new Path("/user/foo")); fs.mkdirs(new Path("/user/bar")); cluster.shutdownNameNode(); //modify edit files for(File dir : FSNamesystem.getNamespaceEditsDirs(conf)) { final File editFile = new File(new File(dir, "current"), NameNodeFile.EDITS.getName()); assertTrue("Should exist: " + editFile, editFile.exists()); modifier.modify(editFile); } try { //restart namenode. cluster.restartNameNode(); //No exception: the modification must be tolerable. Assert.assertTrue(modifier.isTolerable()); } catch (IOException e) { //Got an exception: the modification must be intolerable. LOG.info("Got an exception", e); Assert.assertFalse(modifier.isTolerable()); } } finally { cluster.shutdown(); } }
private void corruptNameNodeFiles() throws IOException { // now corrupt/delete the directrory List<File> nameDirs = (List<File>)FSNamesystem.getNamespaceDirs(config); List<File> nameEditsDirs = (List<File>)FSNamesystem.getNamespaceEditsDirs(config); // get name dir and its length, then delete and recreate the directory File dir = nameDirs.get(0); // has only one this.fsimageLength = new File(new File(dir, "current"), NameNodeFile.IMAGE.getName()).length(); if(dir.exists() && !(FileUtil.fullyDelete(dir))) throw new IOException("Cannot remove directory: " + dir); LOG.info("--removed dir "+dir + ";len was ="+ this.fsimageLength); if (!dir.mkdirs()) throw new IOException("Cannot create directory " + dir); dir = nameEditsDirs.get(0); //has only one this.editsLength = new File(new File(dir, "current"), NameNodeFile.EDITS.getName()).length(); if(dir.exists() && !(FileUtil.fullyDelete(dir))) throw new IOException("Cannot remove directory: " + dir); if (!dir.mkdirs()) throw new IOException("Cannot create directory " + dir); LOG.info("--removed dir and recreated "+dir + ";len was ="+ this.editsLength); }
private void verifyEditLogs(FSNamesystem namesystem, FSImage fsimage) throws IOException { // Verify that we can read in all the transactions that we have written. // If there were any corruptions, it is likely that the reading in // of these transactions will throw an exception. for (Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) { File editFile = FSImage.getImageFile(it.next(), NameNodeFile.EDITS); System.out.println("Verifying file: " + editFile); int numEdits = new FSEditLogLoader(namesystem).loadFSEdits( new EditLogFileInputStream(editFile)); System.out.println("Number of edits: " + numEdits); } }
/** * read currentCheckpointTime directly from the file * @param currDir * @return the checkpoint time * @throws IOException */ long readCheckpointTime(File currDir) throws IOException { File timeFile = new File(currDir, NameNodeFile.TIME.getName()); long timeStamp = 0L; if (timeFile.exists() && timeFile.canRead()) { DataInputStream in = new DataInputStream(new FileInputStream(timeFile)); try { timeStamp = in.readLong(); } finally { in.close(); } } return timeStamp; }
private void corruptNameNodeFiles() throws IOException { // now corrupt/delete the directrory List<URI> nameDirs = (List<URI>)FSNamesystem.getNamespaceDirs(config); List<URI> nameEditsDirs = (List<URI>)FSNamesystem.getNamespaceEditsDirs(config); // get name dir and its length, then delete and recreate the directory File dir = new File(nameDirs.get(0).getPath()); // has only one this.fsimageLength = new File(new File(dir, "current"), NameNodeFile.IMAGE.getName()).length(); if(dir.exists() && !(FileUtil.fullyDelete(dir))) throw new IOException("Cannot remove directory: " + dir); LOG.info("--removed dir "+dir + ";len was ="+ this.fsimageLength); if (!dir.mkdirs()) throw new IOException("Cannot create directory " + dir); dir = new File( nameEditsDirs.get(0).getPath()); //has only one this.editsLength = new File(new File(dir, "current"), NameNodeFile.EDITS.getName()).length(); if(dir.exists() && !(FileUtil.fullyDelete(dir))) throw new IOException("Cannot remove directory: " + dir); if (!dir.mkdirs()) throw new IOException("Cannot create directory " + dir); LOG.info("--removed dir and recreated "+dir + ";len was ="+ this.editsLength); }
/** * check if files exist/not exist */ public void checkFiles(boolean valid) { //look at the valid storage File fsImg1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); File fsImg2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); File fsImg3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); File fsEdits1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); File fsEdits2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); File fsEdits3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); this.printStorages(cluster.getNameNode().getFSImage()); LOG.info("++++ image files = "+fsImg1.getAbsolutePath() + "," + fsImg2.getAbsolutePath() + ","+ fsImg3.getAbsolutePath()); LOG.info("++++ edits files = "+fsEdits1.getAbsolutePath() + "," + fsEdits2.getAbsolutePath() + ","+ fsEdits3.getAbsolutePath()); LOG.info("checkFiles compares lengths: img1=" + fsImg1.length() + ",img2=" + fsImg2.length() + ",img3=" + fsImg3.length()); LOG.info("checkFiles compares lengths: edits1=" + fsEdits1.length() + ",edits2=" + fsEdits2.length() + ",edits3=" + fsEdits3.length()); if(valid) { assertTrue(fsImg1.exists()); assertTrue(fsImg2.exists()); assertFalse(fsImg3.exists()); assertTrue(fsEdits1.exists()); assertTrue(fsEdits2.exists()); assertTrue(fsEdits3.exists()); // should be the same assertTrue(fsImg1.length() == fsImg2.length()); assertTrue(fsEdits1.length() == fsEdits2.length()); assertTrue(fsEdits1.length() == fsEdits3.length()); } else { // should be different assertTrue(fsEdits2.length() != fsEdits1.length()); assertTrue(fsEdits2.length() != fsEdits3.length()); } }
private void verifyEditLogs(FSImage fsimage) throws IOException { // Verify that we can read in all the transactions that we have written. // If there were any corruptions, it is likely that the reading in // of these transactions will throw an exception. for (Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) { File editFile = FSImage.getImageFile(it.next(), NameNodeFile.EDITS); System.out.println("Verifying file: " + editFile); int numEdits = FSEditLog.loadFSEdits( new FSEditLog.EditLogFileInputStream(editFile)); System.out.println("Number of edits: " + numEdits); } }
/** * Tests transaction logging in dfs. */ public void testEditLog() throws IOException { // start a cluster Configuration conf = new Configuration(); MiniDFSCluster cluster = null; FileSystem fileSys = null; try { cluster = new MiniDFSCluster(conf, NUM_DATA_NODES, true, null); cluster.waitActive(); fileSys = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNameNode().getNamesystem(); for (Iterator<File> it = cluster.getNameDirs().iterator(); it.hasNext(); ) { File dir = new File(it.next().getPath()); System.out.println(dir); } FSImage fsimage = namesystem.getFSImage(); FSEditLog editLog = fsimage.getEditLog(); // set small size of flush buffer editLog.setBufferCapacity(2048); editLog.close(); editLog.open(); namesystem.getDelegationTokenSecretManager().startThreads(); // Create threads and make them run transactions concurrently. Thread threadId[] = new Thread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { Transactions trans = new Transactions(namesystem, NUM_TRANSACTIONS); threadId[i] = new Thread(trans, "TransactionThread-" + i); threadId[i].start(); } // wait for all transactions to get over for (int i = 0; i < NUM_THREADS; i++) { try { threadId[i].join(); } catch (InterruptedException e) { i--; // retry } } editLog.close(); // Verify that we can read in all the transactions that we have written. // If there were any corruptions, it is likely that the reading in // of these transactions will throw an exception. // namesystem.getDelegationTokenSecretManager().stopThreads(); int numKeys = namesystem.getDelegationTokenSecretManager().getNumberOfKeys(); for (Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) { File editFile = FSImage.getImageFile(it.next(), NameNodeFile.EDITS); System.out.println("Verifying file: " + editFile); int numEdits = FSEditLog.loadFSEdits( new EditLogFileInputStream(editFile), -1); assertTrue("Verification for " + editFile + " failed. " + "Expected " + (NUM_THREADS * opsPerTrans * NUM_TRANSACTIONS + numKeys) + " transactions. "+ "Found " + numEdits + " transactions.", numEdits == NUM_THREADS * opsPerTrans * NUM_TRANSACTIONS +numKeys); } } finally { if(fileSys != null) fileSys.close(); if(cluster != null) cluster.shutdown(); } }
/** * Simulate a secondary node failure to transfer image * back to the name-node. * Used to truncate primary fsimage file. */ void testSecondaryFailsToReturnImage(Configuration conf) throws IOException { System.out.println("Starting testSecondaryFailsToReturnImage"); Path file1 = new Path("checkpointRI.dat"); MiniDFSCluster cluster = new MiniDFSCluster(conf, numDatanodes, false, null); cluster.waitActive(); FileSystem fileSys = cluster.getFileSystem(); FSImage image = cluster.getNameNode().getFSImage(); try { assertTrue(!fileSys.exists(file1)); StorageDirectory sd = null; for (Iterator<StorageDirectory> it = image.dirIterator(NameNodeDirType.IMAGE); it.hasNext();) sd = it.next(); assertTrue(sd != null); long fsimageLength = FSImage.getImageFile(sd, NameNodeFile.IMAGE).length(); // // Make the checkpoint // SecondaryNameNode secondary = startSecondaryNameNode(conf); ErrorSimulator.setErrorSimulation(2); try { secondary.doCheckpoint(); // this should fail assertTrue(false); } catch (IOException e) { System.out.println("testSecondaryFailsToReturnImage: doCheckpoint() " + "failed predictably - " + e); } ErrorSimulator.clearErrorSimulation(2); // Verify that image file sizes did not change. for (Iterator<StorageDirectory> it = image.dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { assertTrue(FSImage.getImageFile(it.next(), NameNodeFile.IMAGE).length() == fsimageLength); } secondary.shutdown(); } finally { fileSys.close(); cluster.shutdown(); } }
/** * check if files exist/not exist */ public void checkFiles(boolean valid) { // look at the valid storage File fsImg1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); File fsImg2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); File fsImg3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.IMAGE.getName()); File fsEdits1 = new File(path1, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); File fsEdits2 = new File(path2, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); File fsEdits3 = new File(path3, Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS.getName()); this.printStorages(cluster.getNameNode().getFSImage()); String md5_1 = null,md5_2 = null,md5_3 = null; try { md5_1 = getFileMD5(fsEdits1); md5_2 = getFileMD5(fsEdits2); md5_3 = getFileMD5(fsEdits3); } catch (Exception e) { System.err.println("md 5 calculation failed:" + e.getLocalizedMessage()); } LOG.info("++++ image files = " + fsImg1.getAbsolutePath() + "," + fsImg2.getAbsolutePath() + "," + fsImg3.getAbsolutePath()); LOG.info("++++ edits files = " + fsEdits1.getAbsolutePath() + "," + fsEdits2.getAbsolutePath() + "," + fsEdits3.getAbsolutePath()); LOG.info("checkFiles compares lengths: img1=" + fsImg1.length() + ",img2=" + fsImg2.length() + ",img3=" + fsImg3.length()); LOG.info("checkFiles compares lengths: edits1=" + fsEdits1.length() + ",edits2=" + fsEdits2.length() + ",edits3=" + fsEdits3.length()); if (valid) { // should be the same assertTrue(fsImg1.length() == fsImg2.length()); assertTrue(0 == fsImg3.length()); // shouldn't be created assertTrue(fsEdits1.length() == fsEdits2.length()); assertTrue(md5_1.equals(md5_2)); assertTrue(md5_1.equals(md5_3)); } else { // should be different assertFalse(md5_1.equals(md5_2)); assertFalse(md5_1.equals(md5_2)); } }
/** * Tests transaction logging in dfs. */ public void testEditLog() throws IOException { // start a cluster Collection<File> namedirs = null; Collection<File> editsdirs = null; Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster(0, conf, numDatanodes, true, true, null, null); cluster.waitActive(); FileSystem fileSys = cluster.getFileSystem(); int numdirs = 0; try { namedirs = cluster.getNameDirs(); editsdirs = cluster.getNameEditsDirs(); } finally { fileSys.close(); cluster.shutdown(); } for (Iterator it = namedirs.iterator(); it.hasNext(); ) { File dir = (File)it.next(); System.out.println(dir); numdirs++; } FSImage fsimage = new FSImage(namedirs, editsdirs); FSEditLog editLog = fsimage.getEditLog(); // set small size of flush buffer editLog.setBufferCapacity(2048); editLog.close(); editLog.open(); // Create threads and make them run transactions concurrently. Thread threadId[] = new Thread[numThreads]; for (int i = 0; i < numThreads; i++) { Transactions trans = new Transactions(editLog, numberTransactions); threadId[i] = new Thread(trans, "TransactionThread-" + i); threadId[i].start(); } // wait for all transactions to get over for (int i = 0; i < numThreads; i++) { try { threadId[i].join(); } catch (InterruptedException e) { i--; // retry } } editLog.close(); // Verify that we can read in all the transactions that we have written. // If there were any corruptions, it is likely that the reading in // of these transactions will throw an exception. // for (Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) { File editFile = FSImage.getImageFile(it.next(), NameNodeFile.EDITS); System.out.println("Verifying file: " + editFile); int numEdits = FSEditLog.loadFSEdits(new EditLogFileInputStream(editFile), -1); int numLeases = FSNamesystem.getFSNamesystem().leaseManager.countLease(); System.out.println("Number of outstanding leases " + numLeases); assertEquals(0, numLeases); assertTrue("Verification for " + editFile + " failed. " + "Expected " + (numThreads * 2 * numberTransactions) + " transactions. "+ "Found " + numEdits + " transactions.", numEdits == numThreads * 2 * numberTransactions); } }
/** * secnn-7 * secondary node copies fsimage and edits into correct separate directories. * @throws IOException */ public void testSNNStartup() throws IOException{ //setUpConfig(); LOG.info("--starting SecondNN startup test"); // different name dirs config.set("dfs.name.dir", new File(hdfsDir, "name").getPath()); config.set("dfs.name.edits.dir", new File(hdfsDir, "name").getPath()); // same checkpoint dirs config.set("fs.checkpoint.edits.dir", new File(hdfsDir, "chkpt_edits").getPath()); config.set("fs.checkpoint.dir", new File(hdfsDir, "chkpt").getPath()); LOG.info("--starting NN "); MiniDFSCluster cluster = null; SecondaryNameNode sn = null; NameNode nn = null; try { cluster = new MiniDFSCluster(0, config, 1, true, false, false, null, null, null, null); cluster.waitActive(); nn = cluster.getNameNode(); assertNotNull(nn); // start secondary node LOG.info("--starting SecondNN"); sn = new SecondaryNameNode(config); assertNotNull(sn); LOG.info("--doing checkpoint"); sn.doCheckpoint(); // this shouldn't fail LOG.info("--done checkpoint"); // now verify that image and edits are created in the different directories FSImage image = nn.getFSImage(); StorageDirectory sd = image.getStorageDir(0); //only one assertEquals(sd.getStorageDirType(), NameNodeDirType.IMAGE_AND_EDITS); File imf = FSImage.getImageFile(sd, NameNodeFile.IMAGE); File edf = FSImage.getImageFile(sd, NameNodeFile.EDITS); LOG.info("--image file " + imf.getAbsolutePath() + "; len = " + imf.length()); LOG.info("--edits file " + edf.getAbsolutePath() + "; len = " + edf.length()); FSImage chkpImage = sn.getFSImage(); verifyDifferentDirs(chkpImage, imf.length(), edf.length()); } catch (IOException e) { fail(StringUtils.stringifyException(e)); System.err.println("checkpoint failed"); throw e; } finally { if(sn!=null) sn.shutdown(); if(cluster!=null) cluster.shutdown(); } }
/** * Removes the old edit log and renames edits.new to edits. * Reopens the edits file. */ synchronized void purgeEditLog() throws IOException { waitForSyncToFinish(); revertFileStreams( Storage.STORAGE_DIR_CURRENT + "/" + NameNodeFile.EDITS_NEW.getName()); }