/** * Take a snapshot of the specified disabled region */ protected void snapshotDisabledRegion(final HRegionInfo regionInfo) throws IOException { // 1 copy the regionInfo files to the snapshot Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(snapshot, rootDir, regionInfo.getEncodedName()); HRegion.writeRegioninfoOnFilesystem(regionInfo, snapshotRegionDir, fs, conf); // check for error for each region monitor.rethrowException(); // 2 for each region, copy over its recovered.edits directory Path regionDir = HRegion.getRegionDir(rootDir, regionInfo); new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call(); monitor.rethrowException(); status.setStatus("Completed copying recovered edits for offline snapshot of table: " + snapshot.getTable()); // 3 reference all the files in the region new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call(); monitor.rethrowException(); status.setStatus("Completed referencing HFiles for offline snapshot of table: " + snapshot.getTable()); }
/** * Verify that the region (regioninfo, hfiles) are valid * @param fs the FileSystem instance * @param snapshotDir snapshot directory to check * @param region the region to check */ private void verifyRegion(final FileSystem fs, final Path snapshotDir, final HRegionInfo region) throws IOException { // make sure we have region in the snapshot Path regionDir = new Path(snapshotDir, region.getEncodedName()); // make sure we have the region info in the snapshot Path regionInfo = new Path(regionDir, HRegion.REGIONINFO_FILE); // make sure the file exists if (!fs.exists(regionInfo)) { throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot); } FSDataInputStream in = fs.open(regionInfo); HRegionInfo found = new HRegionInfo(); try { found.readFields(in); if (!region.equals(found)) { throw new CorruptedSnapshotException("Found region info (" + found + ") doesn't match expected region:" + region, snapshot); } } finally { in.close(); } // make sure we have the expected recovered edits files TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot); // make sure we have all the expected store files SnapshotReferenceUtil.visitRegionStoreFiles(fs, regionDir, new FSVisitor.StoreFileVisitor() { public void storeFile(final String regionNameSuffix, final String family, final String hfileName) throws IOException { verifyStoreFile(snapshotDir, region, family, hfileName); } }); }
/** * @param snapshot descriptor of the snapshot to take * @param masterServices master services provider * @throws IOException on unexpected error */ public DisabledTableSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices, final MasterMetrics metricsMaster) { super(snapshot, masterServices, metricsMaster); // setup the timer timeoutInjector = TakeSnapshotUtils.getMasterTimerAndBindToMonitor(snapshot, conf, monitor); }
/** * Verify that the region (regioninfo, hfiles) are valid * @param fs the FileSystem instance * @param snapshotDir snapshot directory to check * @param region the region to check */ private void verifyRegion(final FileSystem fs, final Path snapshotDir, final HRegionInfo region) throws IOException { // make sure we have region in the snapshot Path regionDir = new Path(snapshotDir, region.getEncodedName()); // make sure we have the region info in the snapshot Path regionInfo = new Path(regionDir, HRegionFileSystem.REGION_INFO_FILE); // make sure the file exists if (!fs.exists(regionInfo)) { throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot); } HRegionInfo found = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir); if (!region.equals(found)) { throw new CorruptedSnapshotException("Found region info (" + found + ") doesn't match expected region:" + region, snapshot); } // make sure we have the expected recovered edits files TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot); // make sure we have all the expected store files SnapshotReferenceUtil.visitRegionStoreFiles(fs, regionDir, new FSVisitor.StoreFileVisitor() { public void storeFile(final String regionNameSuffix, final String family, final String hfileName) throws IOException { verifyStoreFile(snapshotDir, region, family, hfileName); } }); }
/** * @param snapshot descriptor of the snapshot to take * @param masterServices master services provider */ public DisabledTableSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices) { super(snapshot, masterServices); // setup the timer timeoutInjector = TakeSnapshotUtils.getMasterTimerAndBindToMonitor(snapshot, conf, monitor); }
/** * Confirm that the snapshot contains references to all the files that should be in the snapshot */ public static void confirmSnapshotValid(SnapshotDescription snapshotDescriptor, byte[] tableName, byte[] testFamily, Path rootDir, HBaseAdmin admin, FileSystem fs, boolean requireLogs, Path logsDir, Set<String> snapshotServers) throws IOException { Path snapshotDir = SnapshotDescriptionUtils .getCompletedSnapshotDir(snapshotDescriptor, rootDir); assertTrue(fs.exists(snapshotDir)); Path snapshotinfo = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); assertTrue(fs.exists(snapshotinfo)); // check the logs dir if (requireLogs) { TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, snapshotDescriptor, new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME)); } // check the table info HTableDescriptor desc = FSTableDescriptors.getTableDescriptor(fs, rootDir, tableName); HTableDescriptor snapshotDesc = FSTableDescriptors.getTableDescriptor(fs, snapshotDir); assertEquals(desc, snapshotDesc); // check the region snapshot for all the regions List<HRegionInfo> regions = admin.getTableRegions(tableName); for (HRegionInfo info : regions) { String regionName = info.getEncodedName(); Path regionDir = new Path(snapshotDir, regionName); HRegionInfo snapshotRegionInfo = HRegion.loadDotRegionInfoFileContent(fs, regionDir); assertEquals(info, snapshotRegionInfo); // check to make sure we have the family Path familyDir = new Path(regionDir, Bytes.toString(testFamily)); assertTrue("Expected to find: " + familyDir + ", but it doesn't exist", fs.exists(familyDir)); // make sure we have some files references assertTrue(fs.listStatus(familyDir).length > 0); } }
/** * Complete taking the snapshot on the region. Writes the region info and adds references to the * working snapshot directory. TODO for api consistency, consider adding another version with no * {@link ForeignExceptionSnare} arg. (In the future other cancellable HRegion methods could * eventually add a {@link ForeignExceptionSnare}, or we could do something fancier). * @param desc snasphot description object * @param exnSnare ForeignExceptionSnare that captures external exeptions in case we need to bail * out. This is allowed to be null and will just be ignored in that case. * @throws IOException if there is an external or internal error causing the snapshot to fail */ public void addRegionToSnapshot(SnapshotDescription desc, ForeignExceptionSnare exnSnare) throws IOException { // This should be "fast" since we don't rewrite store files but instead // back up the store files by creating a reference Path rootDir = FSUtils.getRootDir(this.rsServices.getConfiguration()); Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(desc, rootDir, regionInfo.getEncodedName()); // 1. dump region meta info into the snapshot directory LOG.debug("Storing region-info for snapshot."); checkRegioninfoOnFilesystem(snapshotRegionDir); // 2. iterate through all the stores in the region LOG.debug("Creating references for hfiles"); // This ensures that we have an atomic view of the directory as long as we have < ls limit // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in // batches and may miss files being added/deleted. This could be more robust (iteratively // checking to see if we have all the files until we are sure), but the limit is currently 1000 // files/batch, far more than the number of store files under a single column family. for (Store store : stores.values()) { // 2.1. build the snapshot reference directory for the store Path dstStoreDir = TakeSnapshotUtils.getStoreSnapshotDirectory(snapshotRegionDir, Bytes.toString(store.getFamily().getName())); List<StoreFile> storeFiles = store.getStorefiles(); if (LOG.isDebugEnabled()) { LOG.debug("Adding snapshot references for " + storeFiles + " hfiles"); } // 2.2. iterate through all the store's files and create "references". int sz = storeFiles.size(); for (int i = 0; i < sz; i++) { if (exnSnare != null) { exnSnare.rethrowException(); } StoreFile storeFile = storeFiles.get(i); Path file = storeFile.getPath(); LOG.debug("Creating reference for file (" + (i + 1) + "/" + sz + ") : " + file); Path referenceFile = new Path(dstStoreDir, file.getName()); boolean success = true; if (storeFile.isReference()) { // write the Reference object to the snapshot storeFile.getReference().write(fs, referenceFile); } else { // create "reference" to this store file. It is intentionally an empty file -- all // necessary information is captured by its fs location and filename. This allows us to // only figure out what needs to be done via a single nn operation (instead of having to // open and read the files as well). success = HBaseFileSystem.createNewFileOnFileSystem(fs, referenceFile); } if (!success) { throw new IOException("Failed to create reference file:" + referenceFile); } } } }
/** * Complete taking the snapshot on the region. Writes the region info and adds references to the * working snapshot directory. * * TODO for api consistency, consider adding another version with no {@link ForeignExceptionSnare} * arg. (In the future other cancellable HRegion methods could eventually add a * {@link ForeignExceptionSnare}, or we could do something fancier). * * @param desc snasphot description object * @param exnSnare ForeignExceptionSnare that captures external exeptions in case we need to * bail out. This is allowed to be null and will just be ignored in that case. * @throws IOException if there is an external or internal error causing the snapshot to fail */ public void addRegionToSnapshot(SnapshotDescription desc, ForeignExceptionSnare exnSnare) throws IOException { // This should be "fast" since we don't rewrite store files but instead // back up the store files by creating a reference Path rootDir = FSUtils.getRootDir(this.rsServices.getConfiguration()); Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(desc, rootDir, regionInfo.getEncodedName()); // 1. dump region meta info into the snapshot directory LOG.debug("Storing region-info for snapshot."); checkRegioninfoOnFilesystem(snapshotRegionDir); // 2. iterate through all the stores in the region LOG.debug("Creating references for hfiles"); // This ensures that we have an atomic view of the directory as long as we have < ls limit // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in // batches and may miss files being added/deleted. This could be more robust (iteratively // checking to see if we have all the files until we are sure), but the limit is currently 1000 // files/batch, far more than the number of store files under a single column family. for (Store store : stores.values()) { // 2.1. build the snapshot reference directory for the store Path dstStoreDir = TakeSnapshotUtils.getStoreSnapshotDirectory(snapshotRegionDir, Bytes.toString(store.getFamily().getName())); List<StoreFile> storeFiles = store.getStorefiles(); if (LOG.isDebugEnabled()) { LOG.debug("Adding snapshot references for " + storeFiles + " hfiles"); } // 2.2. iterate through all the store's files and create "references". int sz = storeFiles.size(); for (int i = 0; i < sz; i++) { if (exnSnare != null) { exnSnare.rethrowException(); } StoreFile storeFile = storeFiles.get(i); Path file = storeFile.getPath(); LOG.debug("Creating reference for file (" + (i+1) + "/" + sz + ") : " + file); Path referenceFile = new Path(dstStoreDir, file.getName()); boolean success = true; if (storeFile.isReference()) { // write the Reference object to the snapshot storeFile.getReference().write(fs, referenceFile); } else { // create "reference" to this store file. It is intentionally an empty file -- all // necessary information is captured by its fs location and filename. This allows us to // only figure out what needs to be done via a single nn operation (instead of having to // open and read the files as well). success = HBaseFileSystem.createNewFileOnFileSystem(fs, referenceFile); } if (!success) { throw new IOException("Failed to create reference file:" + referenceFile); } } } }
/** * Verify that the region (regioninfo, hfiles) are valid * @param fs the FileSystem instance * @param snapshotDir snapshot directory to check * @param region the region to check */ private void verifyRegion(FileSystem fs, Path snapshotDir, HRegionInfo region) throws IOException { // make sure we have region in the snapshot Path regionDir = new Path(snapshotDir, region.getEncodedName()); if (!fs.exists(regionDir)) { // could happen due to a move or split race. throw new CorruptedSnapshotException("No region directory found for region:" + region, snapshot); } // make sure we have the region info in the snapshot Path regionInfo = new Path(regionDir, HRegion.REGIONINFO_FILE); // make sure the file exists if (!fs.exists(regionInfo)) { throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot); } FSDataInputStream in = fs.open(regionInfo); HRegionInfo found = new HRegionInfo(); try { found.readFields(in); if (!region.equals(found)) { throw new CorruptedSnapshotException("Found region info (" + found + ") doesn't match expected region:" + region, snapshot); } } finally { in.close(); } // make sure we have the expected recovered edits files TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot); // check for the existance of each hfile PathFilter familiesDirs = new FSUtils.FamilyDirFilter(fs); FileStatus[] columnFamilies = FSUtils.listStatus(fs, regionDir, familiesDirs); // should we do some checking here to make sure the cfs are correct? if (columnFamilies == null) return; // setup the suffixes for the snapshot directories Path tableNameSuffix = new Path(tableName); Path regionNameSuffix = new Path(tableNameSuffix, region.getEncodedName()); // get the potential real paths Path archivedRegion = new Path(HFileArchiveUtil.getArchivePath(services.getConfiguration()), regionNameSuffix); Path realRegion = new Path(rootDir, regionNameSuffix); // loop through each cf and check we can find each of the hfiles for (FileStatus cf : columnFamilies) { FileStatus[] hfiles = FSUtils.listStatus(fs, cf.getPath(), null); // should we check if there should be hfiles? if (hfiles == null || hfiles.length == 0) continue; Path realCfDir = new Path(realRegion, cf.getPath().getName()); Path archivedCfDir = new Path(archivedRegion, cf.getPath().getName()); for (FileStatus hfile : hfiles) { // make sure the name is correct if (!StoreFile.validateStoreFileName(hfile.getPath().getName())) { throw new CorruptedSnapshotException("HFile: " + hfile.getPath() + " is not a valid hfile name.", snapshot); } // check to see if hfile is present in the real table String fileName = hfile.getPath().getName(); Path file = new Path(realCfDir, fileName); Path archived = new Path(archivedCfDir, fileName); if (!fs.exists(file) && !fs.exists(archived)) { throw new CorruptedSnapshotException("Can't find hfile: " + hfile.getPath() + " in the real (" + realCfDir + ") or archive (" + archivedCfDir + ") directory for the primary table.", snapshot); } } } }
@Override public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations) throws IOException, KeeperException { try { timeoutInjector.start(); // 1. get all the regions hosting this table. // extract each pair to separate lists Set<String> serverNames = new HashSet<String>(); Set<HRegionInfo> regions = new HashSet<HRegionInfo>(); for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) { regions.add(p.getFirst()); serverNames.add(p.getSecond().toString()); } // 2. for each region, write all the info to disk LOG.info("Starting to write region info and WALs for regions for offline snapshot:" + SnapshotDescriptionUtils.toString(snapshot)); for (HRegionInfo regionInfo : regions) { // 2.1 copy the regionInfo files to the snapshot Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(snapshot, rootDir, regionInfo.getEncodedName()); HRegion.writeRegioninfoOnFilesystem(regionInfo, snapshotRegionDir, fs, conf); // check for error for each region monitor.rethrowException(); // 2.2 for each region, copy over its recovered.edits directory Path regionDir = HRegion.getRegionDir(rootDir, regionInfo); new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call(); monitor.rethrowException(); status.setStatus("Completed copying recovered edits for offline snapshot of table: " + snapshot.getTable()); // 2.3 reference all the files in the region new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call(); monitor.rethrowException(); status.setStatus("Completed referencing HFiles for offline snapshot of table: " + snapshot.getTable()); } // 3. write the table info to disk LOG.info("Starting to copy tableinfo for offline snapshot: " + SnapshotDescriptionUtils.toString(snapshot)); TableInfoCopyTask tableInfoCopyTask = new TableInfoCopyTask(this.monitor, snapshot, fs, FSUtils.getRootDir(conf)); tableInfoCopyTask.call(); monitor.rethrowException(); status.setStatus("Finished copying tableinfo for snapshot of table: " + snapshot.getTable()); } catch (Exception e) { // make sure we capture the exception to propagate back to the client later String reason = "Failed snapshot " + SnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage(); ForeignException ee = new ForeignException(reason, e); monitor.receive(ee); status.abort("Snapshot of table: "+ snapshot.getTable() +" failed because " + e.getMessage()); } finally { LOG.debug("Marking snapshot" + SnapshotDescriptionUtils.toString(snapshot) + " as finished."); // 6. mark the timer as finished - even if we got an exception, we don't need to time the // operation any further timeoutInjector.complete(); } }
/** * Complete taking the snapshot on the region. Writes the region info and adds references to the * working snapshot directory. * * TODO for api consistency, consider adding another version with no {@link ForeignExceptionSnare} * arg. (In the future other cancellable HRegion methods could eventually add a * {@link ForeignExceptionSnare}, or we could do something fancier). * * @param desc snasphot description object * @param exnSnare ForeignExceptionSnare that captures external exeptions in case we need to * bail out. This is allowed to be null and will just be ignored in that case. * @throws IOException if there is an external or internal error causing the snapshot to fail */ public void addRegionToSnapshot(SnapshotDescription desc, ForeignExceptionSnare exnSnare) throws IOException { // This should be "fast" since we don't rewrite store files but instead // back up the store files by creating a reference Path rootDir = FSUtils.getRootDir(this.rsServices.getConfiguration()); Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(desc, rootDir, regionInfo.getEncodedName()); // 1. dump region meta info into the snapshot directory LOG.debug("Storing region-info for snapshot."); checkRegioninfoOnFilesystem(snapshotRegionDir); // 2. iterate through all the stores in the region LOG.debug("Creating references for hfiles"); // This ensures that we have an atomic view of the directory as long as we have < ls limit // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in // batches and may miss files being added/deleted. This could be more robust (iteratively // checking to see if we have all the files until we are sure), but the limit is currently 1000 // files/batch, far more than the number of store files under a single column family. for (Store store : stores.values()) { // 2.1. build the snapshot reference directory for the store Path dstStoreDir = TakeSnapshotUtils.getStoreSnapshotDirectory(snapshotRegionDir, Bytes.toString(store.getFamily().getName())); List<StoreFile> storeFiles = store.getStorefiles(); if (LOG.isDebugEnabled()) { LOG.debug("Adding snapshot references for " + storeFiles + " hfiles"); } // 2.2. iterate through all the store's files and create "references". int sz = storeFiles.size(); for (int i = 0; i < sz; i++) { if (exnSnare != null) { exnSnare.rethrowException(); } Path file = storeFiles.get(i).getPath(); // create "reference" to this store file. It is intentionally an empty file -- all // necessary infomration is captured by its fs location and filename. This allows us to // only figure out what needs to be done via a single nn operation (instead of having to // open and read the files as well). LOG.debug("Creating reference for file (" + (i+1) + "/" + sz + ") : " + file); Path referenceFile = new Path(dstStoreDir, file.getName()); boolean success = HBaseFileSystem.createNewFileOnFileSystem(fs, referenceFile); if (!success) { throw new IOException("Failed to create reference file:" + referenceFile); } } } }
/** * Check that the logs stored in the log directory for the snapshot are valid - it contains all * the expected logs for all servers involved in the snapshot. * @param snapshotDir snapshot directory to check * @param snapshotServers list of the names of servers involved in the snapshot. * @throws CorruptedSnapshotException if the hlogs in the snapshot are not correct * @throws IOException if we can't reach the filesystem */ private void verifyLogs(Path snapshotDir, Set<String> snapshotServers) throws CorruptedSnapshotException, IOException { Path snapshotLogDir = new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME); Path logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, snapshot, snapshotLogDir); }