/** * Returns a StoreFileInfo from the given FileStatus. Secondary replicas refer to the * files of the primary region, so an HFileLink is used to construct the StoreFileInfo. This * way ensures that the secondary will be able to continue reading the store files even if * they are moved to archive after compaction * @throws IOException */ public static StoreFileInfo getStoreFileInfo(Configuration conf, FileSystem fs, HRegionInfo regionInfo, HRegionInfo regionInfoForFs, String familyName, Path path) throws IOException { // if this is a primary region, just return the StoreFileInfo constructed from path if (regionInfo.equals(regionInfoForFs)) { return new StoreFileInfo(conf, fs, path); } // else create a store file link. The link file does not exists on filesystem though. HFileLink link = HFileLink.build(conf, regionInfoForFs.getTable(), regionInfoForFs.getEncodedName(), familyName, path.getName()); if (StoreFileInfo.isReference(path)) { Reference reference = Reference.read(fs, path); return new StoreFileInfo(conf, fs, link.getFileStatus(fs), reference); } return new StoreFileInfo(conf, fs, link.getFileStatus(fs), link); }
/** * Write out a merge reference under the given merges directory. Package local so it doesnt leak * out of regionserver. * * @param mergedRegion {@link HRegionInfo} of the merged region * @param familyName Column Family Name * @param f File to create reference. * @param mergedDir * @return Path to created reference. * @throws IOException */ Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName, final StoreFile f, final Path mergedDir) throws IOException { Path referenceDir = new Path(new Path(mergedDir, mergedRegion.getEncodedName()), familyName); // A whole reference to the store file. Reference r = Reference.createTopReference(regionInfoForFs.getStartKey()); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in <code>f</code> -- parentdir is family, // then the directory above is the region name. String mergingRegionName = regionInfoForFs.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName); return r.write(fs, p); }
/** * helper function to compute HDFS blocks distribution of a given reference file.For reference * file, we don't compute the exact value. We use some estimate instead given it might be good * enough. we assume bottom part takes the first half of reference file, top part takes the second * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile, * also the number and size of keys vary. If this estimate isn't good enough, we can improve it * later. * * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs, final Reference reference, final FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen() / 2; length = status.getLen() - status.getLen() / 2; } else { start = 0; length = status.getLen() / 2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * @param services Master services instance. * @param htd * @param parent * @param daughter * @param midkey * @param top True if we are to write a 'top' reference. * @return Path to reference we created. * @throws IOException */ private Path createReferences(final MasterServices services, final HTableDescriptor htd, final HRegionInfo parent, final HRegionInfo daughter, final byte [] midkey, final boolean top) throws IOException { Path rootdir = services.getMasterFileSystem().getRootDir(); Path tabledir = FSUtils.getTableDir(rootdir, parent.getTable()); Path storedir = HStore.getStoreHomedir(tabledir, daughter, htd.getColumnFamilies()[0].getName()); Reference ref = top? Reference.createTopReference(midkey): Reference.createBottomReference(midkey); long now = System.currentTimeMillis(); // Reference name has this format: StoreFile#REF_NAME_PARSER Path p = new Path(storedir, Long.toString(now) + "." + parent.getEncodedName()); FileSystem fs = services.getMasterFileSystem().getFileSystem(); ref.write(fs, p); return p; }
/** * helper function to compute HDFS blocks distribution of a given reference file.For reference * file, we don't compute the exact value. We use some estimate instead given it might be good * enough. we assume bottom part takes the first half of reference file, top part takes the second * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile, * also the number and size of keys vary. If this estimate isn't good enough, we can improve it * later. * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(FileSystem fs, Reference reference, FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen() / 2; length = status.getLen() - status.getLen() / 2; } else { start = 0; length = status.getLen() / 2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * @param services Master services instance. * @param htd * @param parent * @param daughter * @param midkey * @param top True if we are to write a 'top' reference. * @return Path to reference we created. * @throws IOException */ private Path createReferences(final MasterServices services, final HTableDescriptor htd, final HRegionInfo parent, final HRegionInfo daughter, final byte [] midkey, final boolean top) throws IOException { Path rootdir = services.getMasterFileSystem().getRootDir(); Path tabledir = HTableDescriptor.getTableDir(rootdir, parent.getTableName()); Path storedir = Store.getStoreHomedir(tabledir, daughter.getEncodedName(), htd.getColumnFamilies()[0].getName()); Reference ref = new Reference(midkey, top? Reference.Range.top: Reference.Range.bottom); long now = System.currentTimeMillis(); // Reference name has this format: StoreFile#REF_NAME_PARSER Path p = new Path(storedir, Long.toString(now) + "." + parent.getEncodedName()); FileSystem fs = services.getMasterFileSystem().getFileSystem(); ref.write(fs, p); return p; }
/** * Write out a merge reference under the given merges directory. Package local * so it doesnt leak out of regionserver. * @param mergedRegion {@link HRegionInfo} of the merged region * @param familyName Column Family Name * @param f File to create reference. * @param mergedDir * @return Path to created reference. * @throws IOException */ Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName, final StoreFile f, final Path mergedDir) throws IOException { Path referenceDir = new Path(new Path(mergedDir, mergedRegion.getEncodedName()), familyName); // A whole reference to the store file. Reference r = Reference.createTopReference(regionInfoForFs.getStartKey()); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in <code>f</code> -- parentdir is family, // then the directory above is the region name. String mergingRegionName = regionInfoForFs.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName); return r.write(fs, p); }
/** * helper function to compute HDFS blocks distribution of a given reference * file.For reference file, we don't compute the exact value. We use some * estimate instead given it might be good enough. we assume bottom part * takes the first half of reference file, top part takes the second half * of the reference file. This is just estimate, given * midkey ofregion != midkey of HFile, also the number and size of keys vary. * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( final FileSystem fs, final Reference reference, final FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen()/2; length = status.getLen() - status.getLen()/2; } else { start = 0; length = status.getLen()/2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * Write out a merge reference under the given merges directory. Package local * so it doesnt leak out of regionserver. * @param mergedRegion {@link HRegionInfo} of the merged region * @param familyName Column Family Name * @param f File to create reference. * @param mergedDir * @return Path to created reference. * @throws IOException */ Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName, final StoreFile f, final Path mergedDir) throws IOException { Path referenceDir = new Path(new Path(mergedDir, mergedRegion.getEncodedName()), familyName); // A whole reference to the store file. Reference r = Reference.createTopReference(regionInfo.getStartKey()); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in <code>f</code> -- parentdir is family, // then the directory above is the region name. String mergingRegionName = regionInfo.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName); return r.write(fs, p); }
/** * @param fs fileystem to read from * @param p path to the file * @param in {@link FSDataInputStreamWrapper} * @param size Full size of the file * @param cacheConf * @param r original reference file. This will be not null only when reading a split file. * @param reader the base reader instance * @return The reader to use * @throws IOException */ public StoreFile.Reader postStoreFileReaderOpen(final FileSystem fs, final Path p, final FSDataInputStreamWrapper in, final long size, final CacheConfig cacheConf, final Reference r, StoreFile.Reader reader) throws IOException { ObserverContext<RegionCoprocessorEnvironment> ctx = null; for (RegionEnvironment env : coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); Thread currentThread = Thread.currentThread(); ClassLoader cl = currentThread.getContextClassLoader(); try { currentThread.setContextClassLoader(env.getClassLoader()); reader = ((RegionObserver) env.getInstance()).postStoreFileReaderOpen(ctx, fs, p, in, size, cacheConf, r, reader); } catch (Throwable e) { handleCoprocessorThrowable(env, e); } finally { currentThread.setContextClassLoader(cl); } if (ctx.shouldComplete()) { break; } } } return reader; }
@Test public void testIndexHalfStoreFileReaderWithSeekTo() throws Exception { HBaseTestingUtility test_util = new HBaseTestingUtility(); String root_dir = test_util.getDataTestDir("TestIndexHalfStoreFile").toString(); Path p = new Path(root_dir, "test"); Configuration conf = test_util.getConfiguration(); FileSystem fs = FileSystem.get(conf); CacheConfig cacheConf = new CacheConfig(conf); HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); HFile.Writer w = HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta) .withComparator(KeyValue.COMPARATOR).create(); String usertableName = "testIndexHalfStore"; List<KeyValue> items = genSomeKeys(usertableName); for (KeyValue kv : items) { w.append(kv); } w.close(); HFile.Reader r = HFile.createReader(fs, p, cacheConf, conf); r.loadFileInfo(); byte[] midkey = "005".getBytes(); Reference top = new Reference(midkey, Reference.Range.top); doTestOfScanAndReseek(p, fs, top, cacheConf, conf); r.close(); }
private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf, Configuration conf) throws IOException { final IndexHalfStoreFileReader halfreader = new IndexHalfStoreFileReader(fs, p, cacheConf, bottom, conf); halfreader.loadFileInfo(); final HFileScanner scanner = halfreader.getScanner(false, false); KeyValue getseekTorowKey3 = getSeekToRowKey(); scanner.seekTo(getseekTorowKey3.getBuffer(), 8, 17); boolean next = scanner.next(); KeyValue keyValue = null; if (next) { keyValue = scanner.getKeyValue(); } byte[] expectedRow = getExpected(); byte[] actualRow = keyValue.getRow(); Assert.assertArrayEquals(expectedRow, actualRow); halfreader.close(true); }
@Override public StoreFile.Reader preStoreFileReaderOpen( ObserverContext<RegionCoprocessorEnvironment> ctx, FileSystem fs, Path p, FSDataInputStreamWrapper in, long size, CacheConfig cacheConf, Reference r, StoreFile.Reader reader) throws IOException { try { long delay = rnd.nextInt(3); LOG.info("@@@ Delaying region " + ctx.getEnvironment().getRegion().getRegionInfo(). getRegionNameAsString() + " for " + delay + " seconds..."); Thread.sleep(delay * 1000); } catch (InterruptedException ie) { LOG.error(ie); } return reader; }
/** * helper function to compute HDFS blocks distribution of a given reference * file.For reference file, we don't compute the exact value. We use some * estimate instead given it might be good enough. we assume bottom part * takes the first half of reference file, top part takes the second half * of the reference file. This is just estimate, given * midkey ofregion != midkey of HFile, also the number and size of keys vary. * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( FileSystem fs, Reference reference, FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen()/2; length = status.getLen() - status.getLen()/2; } else { start = 0; length = status.getLen()/2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * Returns a StoreFileInfo from the given FileStatus. Secondary replicas refer to the * files of the primary region, so an HFileLink is used to construct the StoreFileInfo. This * way ensures that the secondary will be able to continue reading the store files even if * they are moved to archive after compaction * @throws IOException */ public static StoreFileInfo getStoreFileInfo(Configuration conf, FileSystem fs, RegionInfo regionInfo, RegionInfo regionInfoForFs, String familyName, Path path) throws IOException { // if this is a primary region, just return the StoreFileInfo constructed from path if (RegionInfo.COMPARATOR.compare(regionInfo, regionInfoForFs) == 0) { return new StoreFileInfo(conf, fs, path); } // else create a store file link. The link file does not exists on filesystem though. HFileLink link = HFileLink.build(conf, regionInfoForFs.getTable(), regionInfoForFs.getEncodedName(), familyName, path.getName()); if (StoreFileInfo.isReference(path)) { Reference reference = Reference.read(fs, path); return new StoreFileInfo(conf, fs, link.getFileStatus(fs), reference); } return new StoreFileInfo(conf, fs, link.getFileStatus(fs), link); }
/** * Write out a merge reference under the given merges directory. Package local * so it doesnt leak out of regionserver. * @param mergedRegion {@link RegionInfo} of the merged region * @param familyName Column Family Name * @param f File to create reference. * @param mergedDir * @return Path to created reference. * @throws IOException */ public Path mergeStoreFile(RegionInfo mergedRegion, String familyName, HStoreFile f, Path mergedDir) throws IOException { Path referenceDir = new Path(new Path(mergedDir, mergedRegion.getEncodedName()), familyName); // A whole reference to the store file. Reference r = Reference.createTopReference(regionInfoForFs.getStartKey()); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in <code>f</code> -- parentdir is family, // then the directory above is the region name. String mergingRegionName = regionInfoForFs.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName); return r.write(fs, p); }
/** * @param fs fileystem to read from * @param p path to the file * @param in {@link FSDataInputStreamWrapper} * @param size Full size of the file * @param cacheConf * @param r original reference file. This will be not null only when reading a split file. * @return a Reader instance to use instead of the base reader if overriding * default behavior, null otherwise * @throws IOException */ public StoreFileReader preStoreFileReaderOpen(final FileSystem fs, final Path p, final FSDataInputStreamWrapper in, final long size, final CacheConfig cacheConf, final Reference r) throws IOException { if (coprocEnvironments.isEmpty()) { return null; } return execOperationWithResult( new ObserverOperationWithResult<RegionObserver, StoreFileReader>(regionObserverGetter, null) { @Override public StoreFileReader call(RegionObserver observer) throws IOException { return observer.preStoreFileReaderOpen(this, fs, p, in, size, cacheConf, r, getResult()); } }); }
/** * @param fs fileystem to read from * @param p path to the file * @param in {@link FSDataInputStreamWrapper} * @param size Full size of the file * @param cacheConf * @param r original reference file. This will be not null only when reading a split file. * @param reader the base reader instance * @return The reader to use * @throws IOException */ public StoreFileReader postStoreFileReaderOpen(final FileSystem fs, final Path p, final FSDataInputStreamWrapper in, final long size, final CacheConfig cacheConf, final Reference r, final StoreFileReader reader) throws IOException { if (this.coprocEnvironments.isEmpty()) { return reader; } return execOperationWithResult( new ObserverOperationWithResult<RegionObserver, StoreFileReader>(regionObserverGetter, reader) { @Override public StoreFileReader call(RegionObserver observer) throws IOException { return observer.postStoreFileReaderOpen(this, fs, p, in, size, cacheConf, r, getResult()); } }); }
private Path createReferences(final MasterServices services, final TableDescriptor td, final HRegionInfo parent, final HRegionInfo daughter, final byte [] midkey, final boolean top) throws IOException { Path rootdir = services.getMasterFileSystem().getRootDir(); Path tabledir = FSUtils.getTableDir(rootdir, parent.getTable()); Path storedir = HStore.getStoreHomedir(tabledir, daughter, td.getColumnFamilies()[0].getName()); Reference ref = top? Reference.createTopReference(midkey): Reference.createBottomReference(midkey); long now = System.currentTimeMillis(); // Reference name has this format: StoreFile#REF_NAME_PARSER Path p = new Path(storedir, Long.toString(now) + "." + parent.getEncodedName()); FileSystem fs = services.getMasterFileSystem().getFileSystem(); ref.write(fs, p); return p; }
/** * helper function to compute HDFS blocks distribution of a given reference * file.For reference file, we don't compute the exact value. We use some * estimate instead given it might be good enough. we assume bottom part * takes the first half of reference file, top part takes the second half * of the reference file. This is just estimate, given * midkey ofregion != midkey of HFile, also the number and size of keys vary. * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference * @param reference The referencePath * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( FileSystem fs, Reference reference, Path referencePath) throws IOException { if ( referencePath == null) { return null; } FileStatus status = fs.getFileStatus(referencePath); long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen()/2; length = status.getLen() - status.getLen()/2; } else { start = 0; length = status.getLen()/2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * Write out a split reference. * * Package local so it doesnt leak out of regionserver. * * @param fs * @param splitDir Presumes path format is actually * <code>SOME_DIRECTORY/REGIONNAME/FAMILY</code>. * @param f File to split. * @param splitRow * @param range * @return Path to created reference. * @throws IOException */ static Path split(final FileSystem fs, final Path splitDir, final StoreFile f, final byte [] splitRow, final Reference.Range range) throws IOException { // A reference to the bottom half of the hsf store file. Reference r = new Reference(splitRow, range); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_PARSER regex above. The referred-to regions name is // up in the path of the passed in <code>f</code> -- parentdir is family, // then the directory above is the region name. String parentRegionName = f.getPath().getParent().getParent().getName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); return r.write(fs, p); }