public static HFileDataBlockEncoder createFromFileInfo( FileInfo fileInfo) throws IOException { DataBlockEncoding encoding = DataBlockEncoding.NONE; byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING); if (dataBlockEncodingType != null) { String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); try { encoding = DataBlockEncoding.valueOf(dataBlockEncodingStr); } catch (IllegalArgumentException ex) { throw new IOException("Invalid data block encoding type in file info: " + dataBlockEncodingStr, ex); } } if (encoding == DataBlockEncoding.NONE) { return NoOpDataBlockEncoder.INSTANCE; } return new HFileDataBlockEncoderImpl(encoding); }
public AbstractHFileWriter(CacheConfig cacheConf, FSDataOutputStream outputStream, Path path, KVComparator comparator, HFileContext fileContext) { this.outputStream = outputStream; this.path = path; this.name = path != null ? path.getName() : outputStream.toString(); this.hFileContext = fileContext; DataBlockEncoding encoding = hFileContext.getDataBlockEncoding(); if (encoding != DataBlockEncoding.NONE) { this.blockEncoder = new HFileDataBlockEncoderImpl(encoding); } else { this.blockEncoder = NoOpDataBlockEncoder.INSTANCE; } this.comparator = comparator != null ? comparator : KeyValue.COMPARATOR; closeOutputStream = path != null; this.cacheConf = cacheConf; }
/** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. * * @param newBlock the block to make current * @throws CorruptHFileException */ private void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { block = newBlock; // sanity checks if (block.getBlockType() != BlockType.ENCODED_DATA) { throw new IllegalStateException( "EncodedScanner works only on encoded data blocks"); } short dataBlockEncoderId = block.getDataBlockEncodingId(); if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { String encoderCls = dataBlockEncoder.getClass().getName(); throw new CorruptHFileException("Encoder " + encoderCls + " doesn't support data block encoding " + DataBlockEncoding.getNameFromId(dataBlockEncoderId)); } seeker.setCurrentBuffer(getEncodedBuffer(newBlock)); blockFetches.incrementAndGet(); // Reset the next indexed key this.nextIndexedKey = null; }
/** * Creates a pre-split table for load testing. If the table already exists, * logs a warning and continues. * @return the number of regions the table was split into */ public static int createPreSplitLoadTestTable(Configuration conf, TableName tableName, byte[][] columnFamilies, Algorithm compression, DataBlockEncoding dataBlockEncoding, int numRegionsPerServer, int regionReplication, Durability durability) throws IOException { HTableDescriptor desc = new HTableDescriptor(tableName); desc.setDurability(durability); desc.setRegionReplication(regionReplication); HColumnDescriptor[] hcds = new HColumnDescriptor[columnFamilies.length]; for (int i = 0; i < columnFamilies.length; i++) { HColumnDescriptor hcd = new HColumnDescriptor(columnFamilies[i]); hcd.setDataBlockEncoding(dataBlockEncoding); hcd.setCompressionType(compression); hcds[i] = hcd; } return createPreSplitLoadTestTable(conf, desc, hcds, numRegionsPerServer); }
private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding, DataBlockEncoding cfEncoding) throws IOException { Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding"); FileSystem fs = util.getTestFileSystem(); Path testIn = new Path(dir, "testhfile"); HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY); familyDesc.setDataBlockEncoding(cfEncoding); HFileTestUtil.createHFileWithDataBlockEncoding( util.getConfiguration(), fs, testIn, bulkloadEncoding, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); Path bottomOut = new Path(dir, "bottom.out"); Path topOut = new Path(dir, "top.out"); LoadIncrementalHFiles.splitStoreFile( util.getConfiguration(), testIn, familyDesc, Bytes.toBytes("ggg"), bottomOut, topOut); int rowCount = verifyHFile(bottomOut); rowCount += verifyHFile(topOut); assertEquals(1000, rowCount); }
private void parseColumnFamilyOptions(CommandLine cmd) { String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING); dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null : DataBlockEncoding.valueOf(dataBlockEncodingStr); String compressStr = cmd.getOptionValue(OPT_COMPRESSION); compressAlgo = compressStr == null ? Compression.Algorithm.NONE : Compression.Algorithm.valueOf(compressStr); String bloomStr = cmd.getOptionValue(OPT_BLOOM); bloomType = bloomStr == null ? BloomType.ROW : BloomType.valueOf(bloomStr); inMemoryCF = cmd.hasOption(OPT_INMEMORY); if (cmd.hasOption(OPT_ENCRYPTION)) { cipher = Encryption.getCipher(conf, cmd.getOptionValue(OPT_ENCRYPTION)); } }
public void majorCompactionWithDataBlockEncoding(boolean inCacheOnly) throws Exception { Map<Store, HFileDataBlockEncoder> replaceBlockCache = new HashMap<Store, HFileDataBlockEncoder>(); for (Store store : r.getStores()) { HFileDataBlockEncoder blockEncoder = store.getDataBlockEncoder(); replaceBlockCache.put(store, blockEncoder); final DataBlockEncoding inCache = DataBlockEncoding.PREFIX; final DataBlockEncoding onDisk = inCacheOnly ? DataBlockEncoding.NONE : inCache; ((HStore)store).setDataBlockEncoderInTest(new HFileDataBlockEncoderImpl(onDisk)); } majorCompaction(); // restore settings for (Entry<Store, HFileDataBlockEncoder> entry : replaceBlockCache.entrySet()) { ((HStore)entry.getKey()).setDataBlockEncoderInTest(entry.getValue()); } }
private static HRegion initHRegion(byte[] tableName, byte[] startKey, byte[] stopKey, String callingMethod, Configuration conf, byte[]... families) throws IOException { HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName)); for(byte [] family : families) { HColumnDescriptor hcd = new HColumnDescriptor(family); hcd.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF); htd.addFamily(hcd); } HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false); Path path = new Path(DIR + callingMethod); FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException("Failed delete of " + path); } } return HRegion.createHRegion(info, path, conf, htd); }
/** * Verify that compression and data block encoding are respected by the * Store.createWriterInTmp() method, used on store flush. */ @Test public void testCreateWriter() throws Exception { Configuration conf = HBaseConfiguration.create(); FileSystem fs = FileSystem.get(conf); HColumnDescriptor hcd = new HColumnDescriptor(family); hcd.setCompressionType(Compression.Algorithm.GZ); hcd.setDataBlockEncoding(DataBlockEncoding.DIFF); init(name.getMethodName(), conf, hcd); // Test createWriterInTmp() StoreFile.Writer writer = store.createWriterInTmp(4, hcd.getCompression(), false, true, false); Path path = writer.getPath(); writer.append(new KeyValue(row, family, qf1, Bytes.toBytes(1))); writer.append(new KeyValue(row, family, qf2, Bytes.toBytes(2))); writer.append(new KeyValue(row2, family, qf1, Bytes.toBytes(3))); writer.append(new KeyValue(row2, family, qf2, Bytes.toBytes(4))); writer.close(); // Verify that compression and encoding settings are respected HFile.Reader reader = HFile.createReader(fs, path, new CacheConfig(conf), conf); Assert.assertEquals(hcd.getCompressionType(), reader.getCompressionAlgorithm()); Assert.assertEquals(hcd.getDataBlockEncoding(), reader.getDataBlockEncoding()); reader.close(); }
@Override public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException { if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) { hitCount += 1; return prevBlock; } missCount += 1; prevBlock = realReader.readBlockData(offset, onDiskSize, -1, pread); prevOffset = offset; prevOnDiskSize = onDiskSize; prevPread = pread; return prevBlock; }
static void assertBuffersEqual(ByteBuffer expectedBuffer, ByteBuffer actualBuffer, Compression.Algorithm compression, DataBlockEncoding encoding, boolean pread) { if (!actualBuffer.equals(expectedBuffer)) { int prefix = 0; int minLimit = Math.min(expectedBuffer.limit(), actualBuffer.limit()); while (prefix < minLimit && expectedBuffer.get(prefix) == actualBuffer.get(prefix)) { prefix++; } fail(String.format( "Content mismatch for %s, commonPrefix %d, expected %s, got %s", buildMessageDetails(compression, encoding, pread), prefix, nextBytesToStr(expectedBuffer, prefix), nextBytesToStr(actualBuffer, prefix))); } }
private void testEncodingWithCacheInternals(boolean useTag) throws IOException { List<KeyValue> kvs = generator.generateTestKeyValues(60, useTag); HFileBlock block = getSampleHFileBlock(kvs, useTag); HFileBlock cacheBlock = createBlockOnDisk(kvs, block, useTag); LruBlockCache blockCache = new LruBlockCache(8 * 1024 * 1024, 32 * 1024); BlockCacheKey cacheKey = new BlockCacheKey("test", 0); blockCache.cacheBlock(cacheKey, cacheBlock); HeapSize heapSize = blockCache.getBlock(cacheKey, false, false, true); assertTrue(heapSize instanceof HFileBlock); HFileBlock returnedBlock = (HFileBlock) heapSize;; if (blockEncoder.getDataBlockEncoding() == DataBlockEncoding.NONE) { assertEquals(block.getBufferWithHeader(), returnedBlock.getBufferWithHeader()); } else { if (BlockType.ENCODED_DATA != returnedBlock.getBlockType()) { System.out.println(blockEncoder); } assertEquals(BlockType.ENCODED_DATA, returnedBlock.getBlockType()); } }
/** * @return All possible data block encoding configurations */ @Parameters public static Collection<Object[]> getAllConfigurations() { List<Object[]> configurations = new ArrayList<Object[]>(); for (DataBlockEncoding diskAlgo : DataBlockEncoding.values()) { for (boolean includesMemstoreTS : new boolean[] { false, true }) { HFileDataBlockEncoder dbe = (diskAlgo == DataBlockEncoding.NONE) ? NoOpDataBlockEncoder.INSTANCE : new HFileDataBlockEncoderImpl(diskAlgo); configurations.add(new Object[] { dbe, new Boolean(includesMemstoreTS) }); } } return configurations; }
private void createTable() throws Exception { deleteTable(); LOG.info("Creating table"); Configuration conf = util.getConfiguration(); String encodingKey = String.format(ENCODING_KEY, this.getClass().getSimpleName()); DataBlockEncoding blockEncoding = DataBlockEncoding.valueOf(conf.get(encodingKey, "FAST_DIFF")); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); for (byte[] cf : dataGen.getColumnFamilies()) { HColumnDescriptor hcd = new HColumnDescriptor(cf); hcd.setDataBlockEncoding(blockEncoding); htd.addFamily(hcd); } int serverCount = util.getHBaseClusterInterface().getClusterStatus().getServersSize(); byte[][] splits = new RegionSplitter.HexStringSplit().split(serverCount * REGIONS_PER_SERVER); util.getHBaseAdmin().createTable(htd, splits); LOG.info("Created table"); }
@Override public void perform() throws Exception { HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); if (columnDescriptors == null || columnDescriptors.length == 0) { return; } LOG.debug("Performing action: Changing encodings on " + tableName); // possible DataBlockEncoding id's int[] possibleIds = {0, 2, 3, 4, 6}; for (HColumnDescriptor descriptor : columnDescriptors) { short id = (short) possibleIds[random.nextInt(possibleIds.length)]; descriptor.setDataBlockEncoding(DataBlockEncoding.getEncodingById(id)); LOG.debug("Set encoding of column family " + descriptor.getNameAsString() + " to: " + descriptor.getDataBlockEncoding()); } // Don't try the modify if we're stopping if (context.isStopping()) { return; } admin.modifyTable(tableName, tableDescriptor); }
public HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags, Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType, int bytesPerChecksum, int blockSize, DataBlockEncoding encoding, Encryption.Context cryptoContext, long fileCreateTime) { this.usesHBaseChecksum = useHBaseChecksum; this.includesMvcc = includesMvcc; this.includesTags = includesTags; this.compressAlgo = compressAlgo; this.compressTags = compressTags; this.checksumType = checksumType; this.bytesPerChecksum = bytesPerChecksum; this.blocksize = blockSize; if (encoding != null) { this.encoding = encoding; } this.cryptoContext = cryptoContext; this.fileCreateTime = fileCreateTime; }
/** @return data block encoding algorithm used on disk */ public DataBlockEncoding getDataBlockEncodingOnDisk() { String encodeOnDiskStr = getValue(ENCODE_ON_DISK); boolean encodeOnDisk; if (encodeOnDiskStr == null) { encodeOnDisk = DEFAULT_ENCODE_ON_DISK; } else { encodeOnDisk = Boolean.valueOf(encodeOnDiskStr); } if (!encodeOnDisk) { // No encoding on disk. return DataBlockEncoding.NONE; } return getDataBlockEncoding(); }
/** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. * * @param newBlock the block to make current * @throws CorruptHFileException */ private void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { block = newBlock; // sanity checks if (block.getBlockType() != BlockType.ENCODED_DATA) { throw new IllegalStateException( "EncodedScanner works only on encoded data blocks"); } short dataBlockEncoderId = block.getDataBlockEncodingId(); if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { String encoderCls = dataBlockEncoder.getClass().getName(); throw new CorruptHFileException("Encoder " + encoderCls + " doesn't support data block encoding " + DataBlockEncoding.getNameFromId(dataBlockEncoderId)); } seeker.setCurrentBuffer(getEncodedBuffer(newBlock)); blockFetches++; // Reset the next indexed key this.nextIndexedKey = null; }
private HFileBlock encodeDataBlock(HFileBlock block, DataBlockEncoding algo, boolean includesMemstoreTS) { ByteBuffer compressedBuffer = encodeBufferToHFileBlockBuffer( block.getBufferWithoutHeader(), algo, includesMemstoreTS, block.getDummyHeaderForVersion()); int sizeWithoutHeader = compressedBuffer.limit() - block.headerSize(); HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA, block.getOnDiskSizeWithoutHeader(), sizeWithoutHeader, block.getPrevBlockOffset(), compressedBuffer, HFileBlock.FILL_HEADER, block.getOffset(), includesMemstoreTS, block.getMinorVersion(), block.getBytesPerChecksum(), block.getChecksumType(), block.getOnDiskDataSizeWithHeader()); block.passSchemaMetricsTo(encodedBlock); return encodedBlock; }
public HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags, Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType, int bytesPerChecksum, int blockSize, DataBlockEncoding encoding, Encryption.Context cryptoContext) { this.usesHBaseChecksum = useHBaseChecksum; this.includesMvcc = includesMvcc; this.includesTags = includesTags; this.compressAlgo = compressAlgo; this.compressTags = compressTags; this.checksumType = checksumType; this.bytesPerChecksum = bytesPerChecksum; this.blocksize = blockSize; if (encoding != null) { this.encoding = encoding; } this.cryptoContext = cryptoContext; }
protected void parseColumnFamilyOptions(CommandLine cmd) { String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING); dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null : DataBlockEncoding.valueOf(dataBlockEncodingStr); if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) { throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " + "does not make sense when data block encoding is not used"); } String compressStr = cmd.getOptionValue(OPT_COMPRESSION); compressAlgo = compressStr == null ? Compression.Algorithm.NONE : Compression.Algorithm.valueOf(compressStr); String bloomStr = cmd.getOptionValue(OPT_BLOOM); bloomType = bloomStr == null ? null : StoreFile.BloomType.valueOf(bloomStr); inMemoryCF = cmd.hasOption(OPT_INMEMORY); }
/** * Test for HBASE-8012 */ public void testReseek() throws Exception { // write the file Path f = new Path(ROOT_DIR, getName()); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024) .withFilePath(f) .build(); writeStoreFile(writer); writer.close(); StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, DataBlockEncoding.NONE); // Now do reseek with empty KV to position to the beginning of the file KeyValue k = KeyValue.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); StoreFileScanner s = reader.getStoreFileScanner(false, false); s.reseek(k); assertNotNull("Intial reseek should position at the beginning of the file", s.peek()); }
/** * Test putting and taking out blocks into cache with different * encoding options. */ @Test public void testEncodingWithCache() { HFileBlock block = getSampleHFileBlock(); LruBlockCache blockCache = new LruBlockCache(8 * 1024 * 1024, 32 * 1024, TEST_UTIL.getConfiguration()); HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(block, false); BlockCacheKey cacheKey = new BlockCacheKey("test", 0); blockCache.cacheBlock(cacheKey, cacheBlock); HeapSize heapSize = blockCache.getBlock(cacheKey, false, false); assertTrue(heapSize instanceof HFileBlock); HFileBlock returnedBlock = (HFileBlock) heapSize;; if (blockEncoder.getEncodingInCache() == DataBlockEncoding.NONE) { assertEquals(block.getBufferWithHeader(), returnedBlock.getBufferWithHeader()); } else { if (BlockType.ENCODED_DATA != returnedBlock.getBlockType()) { System.out.println(blockEncoder); } assertEquals(BlockType.ENCODED_DATA, returnedBlock.getBlockType()); } }
/** * @return All possible data block encoding configurations */ @Parameters public static Collection<Object[]> getAllConfigurations() { List<Object[]> configurations = new ArrayList<Object[]>(); for (DataBlockEncoding diskAlgo : DataBlockEncoding.values()) { for (DataBlockEncoding cacheAlgo : DataBlockEncoding.values()) { if (diskAlgo != cacheAlgo && diskAlgo != DataBlockEncoding.NONE) { // We allow (1) the same encoding on disk and in cache, and // (2) some encoding in cache but no encoding on disk (for testing). continue; } for (boolean includesMemstoreTS : new boolean[] {false, true}) { configurations.add(new Object[] { new HFileDataBlockEncoderImpl(diskAlgo, cacheAlgo), new Boolean(includesMemstoreTS)}); } } } return configurations; }
private void createTableIfNotExists() throws AtlasException { Admin admin = null; try { admin = connection.getAdmin(); LOG.info("Checking if table {} exists", tableName.getNameAsString()); if (!admin.tableExists(tableName)) { LOG.info("Creating table {}", tableName.getNameAsString()); HTableDescriptor tableDescriptor = new HTableDescriptor(tableName); HColumnDescriptor columnFamily = new HColumnDescriptor(COLUMN_FAMILY); columnFamily.setMaxVersions(1); columnFamily.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF); columnFamily.setCompressionType(Compression.Algorithm.GZ); columnFamily.setBloomFilterType(BloomType.ROW); tableDescriptor.addFamily(columnFamily); admin.createTable(tableDescriptor); } else { LOG.info("Table {} exists", tableName.getNameAsString()); } } catch (IOException e) { throw new AtlasException(e); } finally { close(admin); } }
@Test public void ignoredOptionsAreIgnored() { // We're really checking to make certain we don't trigger an exception for an ignored option: descriptor.setCompressionType(Compression.Algorithm.LZ4); descriptor.setCompactionCompressionType(Compression.Algorithm.LZ4); descriptor.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF); descriptor.setBlockCacheEnabled(false); descriptor.setCacheDataOnWrite(true); descriptor.setCacheDataInL1(true); descriptor.setEvictBlocksOnClose(false); descriptor.setBloomFilterType(BloomType.ROW); descriptor.setPrefetchBlocksOnOpen(true); descriptor.setBlocksize(16 * 1024); descriptor.setScope(1); // REPLICATION_SCOPE descriptor.setInMemory(true); ColumnFamily.Builder result = adapter.adapt(descriptor) .clearName() .clearGcExpression(); Assert.assertArrayEquals( new byte[0], result.build().toByteArray()); }
public void majorCompactionWithDataBlockEncoding(boolean inCacheOnly) throws Exception { Map<HStore, HFileDataBlockEncoder> replaceBlockCache = new HashMap<HStore, HFileDataBlockEncoder>(); for (Entry<byte[], Store> pair : r.getStores().entrySet()) { HStore store = (HStore) pair.getValue(); HFileDataBlockEncoder blockEncoder = store.getDataBlockEncoder(); replaceBlockCache.put(store, blockEncoder); final DataBlockEncoding inCache = DataBlockEncoding.PREFIX; final DataBlockEncoding onDisk = inCacheOnly ? DataBlockEncoding.NONE : inCache; store.setDataBlockEncoderInTest(new HFileDataBlockEncoderImpl(onDisk)); } majorCompaction(); // restore settings for (Entry<HStore, HFileDataBlockEncoder> entry : replaceBlockCache.entrySet()) { entry.getKey().setDataBlockEncoderInTest(entry.getValue()); } }