private void createHFile(Path path, byte[] family, byte[] qualifier, byte[] startKey, byte[] endKey, int numRows) throws IOException { HFile.Writer writer = null; long now = System.currentTimeMillis(); try { HFileContext context = new HFileContextBuilder().build(); writer = HFile.getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) .withFileContext(context) .create(); // subtract 2 since numRows doesn't include boundary keys for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, true, numRows-2)) { KeyValue kv = new KeyValue(key, family, qualifier, now, key); writer.append(kv); } } finally { if(writer != null) writer.close(); } }
@Override void setUp() throws Exception { HFileContextBuilder builder = new HFileContextBuilder() .withCompression(AbstractHFileWriter.compressionByName(codec)) .withBlockSize(RFILE_BLOCKSIZE); if (cipher == "aes") { byte[] cipherKey = new byte[AES.KEY_LENGTH]; new SecureRandom().nextBytes(cipherKey); builder.withEncryptionContext(Encryption.newContext(conf) .setCipher(Encryption.getCipher(conf, cipher)) .setKey(cipherKey)); } else if (!"none".equals(cipher)) { throw new IOException("Cipher " + cipher + " not supported."); } HFileContext hFileContext = builder.build(); writer = HFile.getWriterFactoryNoCache(conf) .withPath(fs, mf) .withFileContext(hFileContext) .withComparator(new KeyValue.RawBytesComparator()) .create(); }
private static void createHFile( Configuration conf, FileSystem fs, Path path, byte[] family, byte[] qualifier) throws IOException { HFileContext context = new HFileContextBuilder().build(); HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) .withFileContext(context) .create(); long now = System.currentTimeMillis(); try { for (int i =1;i<=9;i++) { KeyValue kv = new KeyValue(Bytes.toBytes(i+""), family, qualifier, now, Bytes.toBytes(i+"")); writer.append(kv); } } finally { writer.close(); } }
/** * Create an HFile with the given number of rows with a specified value. */ public static void createHFile(FileSystem fs, Path path, byte[] family, byte[] qualifier, byte[] value, int numRows) throws IOException { HFileContext context = new HFileContextBuilder().withBlockSize(BLOCKSIZE) .withCompression(COMPRESSION) .build(); HFile.Writer writer = HFile .getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) .withFileContext(context) .create(); long now = System.currentTimeMillis(); try { // subtract 2 since iterateOnSplits doesn't include boundary keys for (int i = 0; i < numRows; i++) { KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value); writer.append(kv); } writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(now)); } finally { writer.close(); } }
private String createHFileForFamilies(byte[] family) throws IOException { HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(conf); // TODO We need a way to do this without creating files File hFileLocation = testFolder.newFile(); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(hFileLocation)); try { hFileFactory.withOutputStream(out); hFileFactory.withFileContext(new HFileContext()); HFile.Writer writer = hFileFactory.create(); try { writer.append(new KeyValue(CellUtil.createCell(randomBytes, family, randomBytes, 0l, KeyValue.Type.Put.getCode(), randomBytes))); } finally { writer.close(); } } finally { out.close(); } return hFileLocation.getAbsoluteFile().getAbsolutePath(); }
private String createHFileForFamilies(Path testPath, byte[] family, byte[] valueBytes) throws IOException { HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(TEST_UTIL.getConfiguration()); // TODO We need a way to do this without creating files Path testFile = new Path(testPath, UUID.randomUUID().toString()); FSDataOutputStream out = TEST_UTIL.getTestFileSystem().create(testFile); try { hFileFactory.withOutputStream(out); hFileFactory.withFileContext(new HFileContext()); HFile.Writer writer = hFileFactory.create(); try { writer.append(new KeyValue(CellUtil.createCell(valueBytes, family, valueBytes, 0l, KeyValue.Type.Put.getCode(), valueBytes))); } finally { writer.close(); } } finally { out.close(); } return testFile.toString(); }
private void addStoreFile() throws IOException { StoreFile f = this.store.getStorefiles().iterator().next(); Path storedir = f.getPath().getParent(); long seqid = this.store.getMaxSequenceId(); Configuration c = TEST_UTIL.getConfiguration(); FileSystem fs = FileSystem.get(c); HFileContext fileContext = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).build(); StoreFile.Writer w = new StoreFile.WriterBuilder(c, new CacheConfig(c), fs) .withOutputDir(storedir) .withFileContext(fileContext) .build(); w.appendMetadata(seqid + 1, false); w.close(); LOG.info("Added store file:" + w.getPath()); }
/** * Write a file and then assert that we can read from top and bottom halves * using two HalfMapFiles. * @throws Exception */ public void testBasicHalfMapFile() throws Exception { final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb")); HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem( conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri); HFileContext meta = new HFileContextBuilder().withBlockSize(2*1024).build(); StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(regionFs.createTempName()) .withFileContext(meta) .build(); writeStoreFile(writer); Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE); checkHalfHFile(regionFs, sf); }
public void testBloomFilter() throws Exception { FileSystem fs = FileSystem.getLocal(conf); conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); // write the file Path f = new Path(ROOT_DIR, getName()); HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) .withChecksumType(CKTYPE) .withBytesPerCheckSum(CKBYTES).build(); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withBloomType(BloomType.ROW) .withMaxKeyCount(2000) .withFileContext(meta) .build(); bloomWriteRead(writer, fs); }
/** * Test for HBASE-8012 */ public void testReseek() throws Exception { // write the file Path f = new Path(ROOT_DIR, getName()); HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withFileContext(meta) .build(); writeStoreFile(writer); writer.close(); StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, conf); // Now do reseek with empty KV to position to the beginning of the file KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); StoreFileScanner s = reader.getStoreFileScanner(false, false); s.reseek(k); assertNotNull("Intial reseek should position at the beginning of the file", s.peek()); }
@Test public void testSeekWithRandomData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); int batchId = numBatchesWritten++; HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); generateRandomTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream); EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = baosInMemory.toByteArray(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); }
@Test public void testSeekWithFixedData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); generateFixedTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream); EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = baosInMemory.toByteArray(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); }
private void seekToTheKey(KeyValue expected, List<KeyValue> kvs, KeyValue toSeek) throws IOException { // create all seekers List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<DataBlockEncoder.EncodedSeeker>(); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { if (encoding.getEncoder() == null || encoding == DataBlockEncoding.PREFIX_TREE) { continue; } DataBlockEncoder encoder = encoding.getEncoder(); HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(false) .withIncludesMvcc(false).withIncludesTags(false) .withCompression(Compression.Algorithm.NONE).build(); HFileBlockEncodingContext encodingContext = encoder.newDataBlockEncodingContext(encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta); ByteBuffer encodedBuffer = TestDataBlockEncoders.encodeKeyValues(encoding, kvs, encodingContext); DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); seeker.setCurrentBuffer(encodedBuffer); encodedSeekers.add(seeker); } // test it! // try a few random seeks checkSeekingConsistency(encodedSeekers, toSeek, expected); }
private HFileBlockEncodingContext getEncodingContext(Compression.Algorithm algo, DataBlockEncoding encoding) { DataBlockEncoder encoder = encoding.getEncoder(); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(includesTags) .withCompression(algo).build(); if (encoder != null) { return encoder.newDataBlockEncodingContext(encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta); } else { return new HFileBlockDefaultEncodingContext(encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta); } }
private void testAlgorithm(byte[] encodedData, ByteBuffer unencodedDataBuf, DataBlockEncoder encoder) throws IOException { // decode ByteArrayInputStream bais = new ByteArrayInputStream(encodedData, ENCODED_DATA_OFFSET, encodedData.length - ENCODED_DATA_OFFSET); DataInputStream dis = new DataInputStream(bais); ByteBuffer actualDataset; HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(false) .withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTags) .withCompression(Compression.Algorithm.NONE).build(); actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta)); actualDataset.rewind(); // this is because in case of prefix tree the decoded stream will not have // the // mvcc in it. assertEquals("Encoding -> decoding gives different results for " + encoder, Bytes.toStringBinary(unencodedDataBuf), Bytes.toStringBinary(actualDataset)); }
private HFileContext createFileContext(Compression.Algorithm compression, boolean includeMVCCReadpoint, boolean includesTag, Encryption.Context cryptoContext) { if (compression == null) { compression = HFile.DEFAULT_COMPRESSION_ALGORITHM; } HFileContext hFileContext = new HFileContextBuilder() .withIncludesMvcc(includeMVCCReadpoint) .withIncludesTags(includesTag) .withCompression(compression) .withCompressTags(family.isCompressTags()) .withChecksumType(checksumType) .withBytesPerCheckSum(bytesPerChecksum) .withBlockSize(blocksize) .withHBaseCheckSum(true) .withDataBlockEncoding(family.getDataBlockEncoding()) .withEncryptionContext(cryptoContext) .build(); return hFileContext; }
/** * Create an HFile with the given number of rows between a given * start key and end key. */ public static void createHFile( Configuration configuration, FileSystem fs, Path path, byte[] family, byte[] qualifier, byte[] startKey, byte[] endKey, int numRows) throws IOException { HFileContext meta = new HFileContextBuilder().build(); HFile.Writer writer = HFile.getWriterFactory(configuration, new CacheConfig(configuration)) .withPath(fs, path) .withFileContext(meta) .create(); long now = System.currentTimeMillis(); try { // subtract 2 since iterateOnSplits doesn't include boundary keys for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) { KeyValue kv = new KeyValue(key, family, qualifier, now, key); writer.append(kv); } } finally { writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); writer.close(); } }
public static void doSmokeTest(FileSystem fs, Path path, String codec) throws Exception { Configuration conf = HBaseConfiguration.create(); HFileContext context = new HFileContextBuilder() .withCompression(AbstractHFileWriter.compressionByName(codec)).build(); HFile.Writer writer = HFile.getWriterFactoryNoCache(conf) .withPath(fs, path) .withFileContext(context) .create(); writer.append(Bytes.toBytes("testkey"), Bytes.toBytes("testval")); writer.appendFileInfo(Bytes.toBytes("infokey"), Bytes.toBytes("infoval")); writer.close(); HFile.Reader reader = HFile.createReader(fs, path, new CacheConfig(conf), conf); reader.loadFileInfo(); byte[] key = reader.getFirstKey(); boolean rc = Bytes.toString(key).equals("testkey"); reader.close(); if (!rc) { throw new Exception("Read back incorrect result: " + Bytes.toStringBinary(key)); } }
private HFileContext createFileContext(Compression.Algorithm compression, boolean includeMVCCReadpoint, boolean includesTag, Encryption.Context cryptoContext) { if (compression == null) { compression = HFile.DEFAULT_COMPRESSION_ALGORITHM; } HFileContext hFileContext = new HFileContextBuilder() .withIncludesMvcc(includeMVCCReadpoint) .withIncludesTags(includesTag) .withCompression(compression) .withCompressTags(family.shouldCompressTags()) .withChecksumType(checksumType) .withBytesPerCheckSum(bytesPerChecksum) .withBlockSize(blocksize) .withHBaseCheckSum(true) .withDataBlockEncoding(family.getDataBlockEncoding()) .withEncryptionContext(cryptoContext) .build(); return hFileContext; }
/** * Create an HFile with the given number of rows with a specified value. */ public static void createHFile(FileSystem fs, Path path, byte[] family, byte[] qualifier, byte[] value, int numRows) throws IOException { HFileContext context = new HFileContextBuilder().withBlockSize(BLOCKSIZE) .withCompression(COMPRESSION) .build(); HFile.Writer writer = HFile .getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) .withFileContext(context) .create(); long now = System.currentTimeMillis(); try { // subtract 2 since iterateOnSplits doesn't include boundary keys for (int i = 0; i < numRows; i++) { KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value); writer.append(kv); } } finally { writer.close(); } }
/** * Test for HBASE-8012 */ public void testReseek() throws Exception { // write the file Path f = new Path(ROOT_DIR, getName()); HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withFileContext(meta) .build(); writeStoreFile(writer); writer.close(); StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, conf); // Now do reseek with empty KV to position to the beginning of the file KeyValue k = KeyValue.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); StoreFileScanner s = reader.getStoreFileScanner(false, false); s.reseek(k); assertNotNull("Intial reseek should position at the beginning of the file", s.peek()); }
@Test public void testSeekWithRandomData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId, includesTag); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); encoder.encodeKeyValues(dataBuffer, blkEncodingCtx); EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); }
@Test public void testSeekWithFixedData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, includesTag); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); encoder.encodeKeyValues(dataBuffer, blkEncodingCtx); EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); }