/** * Validates the false positive ratio by computing its z-value and comparing * it to the provided threshold. * * @param falsePosRate experimental positive rate * @param nTrials the number of Bloom filter checks * @param zValueBoundary z-value boundary, positive for an upper bound and * negative for a lower bound * @param cbf the compound Bloom filter we are using * @param additionalMsg additional message to include in log output and * assertion failures */ private void validateFalsePosRate(double falsePosRate, int nTrials, double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) { double p = BloomFilterFactory.getErrorRate(conf); double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials); String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate=" + falsePosRate + ", nTrials=" + nTrials + ")"; LOG.info("z-value is " + zValue + assortedStatsStr); boolean isUpperBound = zValueBoundary > 0; if (isUpperBound && zValue > zValueBoundary || !isUpperBound && zValue < zValueBoundary) { String errorMsg = "False positive rate z-value " + zValue + " is " + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary + assortedStatsStr + ". Per-chunk stats:\n" + cbf.formatTestingStats(); fail(errorMsg + additionalMsg); } }
public void testBloomFilter() throws Exception { FileSystem fs = FileSystem.getLocal(conf); conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); // write the file Path f = new Path(ROOT_DIR, getName()); HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) .withChecksumType(CKTYPE) .withBytesPerCheckSum(CKBYTES).build(); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withBloomType(BloomType.ROW) .withMaxKeyCount(2000) .withFileContext(meta) .build(); bloomWriteRead(writer, fs); }
@Before public void setUp() throws IOException { conf = TEST_UTIL.getConfiguration(); this.conf.set("dfs.datanode.data.dir.perm", "700"); conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE); conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); conf.setBoolean(CacheConfig.CACHE_DATA_BLOCKS_COMPRESSED_KEY, cacheCompressedData); cowType.modifyConf(conf); fs = HFileSystem.get(conf); CacheConfig.GLOBAL_BLOCK_CACHE_INSTANCE = blockCache; cacheConf = new CacheConfig(blockCache, true, true, cowType.shouldBeCached(BlockType.DATA), cowType.shouldBeCached(BlockType.LEAF_INDEX), cowType.shouldBeCached(BlockType.BLOOM_CHUNK), false, cacheCompressedData, false, false, false); }
public HFileSortedOplogWriter() throws IOException { writer = HFile.getWriterFactory(hconf, hcache) .withPath(fs, path) .withBlockSize(sopConfig.getBlockSize()) .withBytesPerChecksum(sopConfig.getBytesPerChecksum()) .withChecksumType(HFileSortedOplogFactory.convertChecksum(sopConfig.getChecksum())) // .withComparator(sopConfig.getComparator()) .withCompression(HFileSortedOplogFactory.convertCompression(sopConfig.getCompression())) .withDataBlockEncoder(HFileSortedOplogFactory.convertEncoding(sopConfig.getKeyEncoding())) .create(); bfw = sopConfig.isBloomFilterEnabled() ? // BloomFilterFactory.createGeneralBloomAtWrite(hconf, hcache, BloomType.ROW, // 0, writer, sopConfig.getComparator()) BloomFilterFactory.createGeneralBloomAtWrite(hconf, hcache, BloomType.ROW, 0, writer) : null; }
public HFileSortedOplogWriter(int keys) throws IOException { try { int hfileBlockSize = Integer.getInteger( HoplogConfig.HFILE_BLOCK_SIZE_CONF, (1 << 16)); Algorithm compress = Algorithm.valueOf(System.getProperty(HoplogConfig.COMPRESSION, HoplogConfig.COMPRESSION_DEFAULT)); // ByteComparator bc = new ByteComparator(); writer = HFile.getWriterFactory(conf, cacheConf) .withPath(fsProvider.getFS(), path) .withBlockSize(hfileBlockSize) // .withComparator(bc) .withCompression(compress) .create(); bfw = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, BloomType.ROW, keys, writer); logger.fine("Created hoplog writer with compression " + compress); } catch (IOException e) { logger.fine("IO Error while creating writer"); throw e; } }
public void testBloomFilter() throws Exception { FileSystem fs = FileSystem.getLocal(conf); conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); // write the file Path f = new Path(ROOT_DIR, getName()); StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL) .withFilePath(f) .withBloomType(StoreFile.BloomType.ROW) .withMaxKeyCount(2000) .withChecksumType(CKTYPE) .withBytesPerChecksum(CKBYTES) .build(); bloomWriteRead(writer, fs); }
@Before public void setUp() throws IOException { conf = TEST_UTIL.getConfiguration(); conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE); conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, cowType.shouldBeCached(BlockType.DATA)); conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, cowType.shouldBeCached(BlockType.LEAF_INDEX)); conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, cowType.shouldBeCached(BlockType.BLOOM_CHUNK)); cowType.modifyConf(conf); fs = HFileSystem.get(conf); cacheConf = new CacheConfig(conf); blockCache = cacheConf.getBlockCache(); }
/** * Constructor, loads a reader and it's indices, etc. May allocate a * substantial amount of ram depending on the underlying files (10-20MB?). * * @param fs The current file system to use. * @param fileInfo The store file information. * @param conf The current configuration. * @param cacheConf The cache configuration and block cache reference. * @param cfBloomType The bloom type to use for this store file as specified * by column family configuration. This may or may not be the same * as the Bloom filter type actually present in the HFile, because * column family configuration might change. If this is * {@link BloomType#NONE}, the existing Bloom filter is ignored. * @throws IOException When opening the reader fails. */ public StoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final Configuration conf, final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException { this.fs = fs; this.fileInfo = fileInfo; this.cacheConf = cacheConf; if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { this.cfBloomType = cfBloomType; } else { LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " + "cfBloomType=" + cfBloomType + " (disabled in config)"); this.cfBloomType = BloomType.NONE; } // cache the modification time stamp of this store file this.modificationTimeStamp = fileInfo.getModificationTime(); }
@Before public void setUp() throws IOException { conf = TEST_UTIL.getConfiguration(); this.conf.set("dfs.datanode.data.dir.perm", "700"); conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE); conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); conf.setBoolean(CacheConfig.CACHE_DATA_BLOCKS_COMPRESSED_KEY, cacheCompressedData); cowType.modifyConf(conf); fs = HFileSystem.get(conf); cacheConf = new CacheConfig(blockCache, true, true, cowType.shouldBeCached(BlockType.DATA), cowType.shouldBeCached(BlockType.LEAF_INDEX), cowType.shouldBeCached(BlockType.BLOOM_CHUNK), false, cacheCompressedData, true, false); }
@Before public void setUp() throws IOException { conf = TEST_UTIL.getConfiguration(); this.conf.set("dfs.datanode.data.dir.perm", "700"); conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE); conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, cowType.shouldBeCached(BlockType.DATA)); conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, cowType.shouldBeCached(BlockType.LEAF_INDEX)); conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, cowType.shouldBeCached(BlockType.BLOOM_CHUNK)); cowType.modifyConf(conf); fs = HFileSystem.get(conf); cacheConf = new CacheConfig(conf); blockCache = cacheConf.getBlockCache(); }
/** * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram * depending on the underlying files (10-20MB?). * @param fs fs The current file system to use. * @param fileInfo The store file information. * @param conf The current configuration. * @param cacheConf The cache configuration and block cache reference. * @param cfBloomType The bloom type to use for this store file as specified by column * family configuration. This may or may not be the same as the Bloom filter type * actually present in the HFile, because column family configuration might change. If * this is {@link BloomType#NONE}, the existing Bloom filter is ignored. * @param primaryReplica true if this is a store file for primary replica, otherwise false. */ public HStoreFile(FileSystem fs, StoreFileInfo fileInfo, Configuration conf, CacheConfig cacheConf, BloomType cfBloomType, boolean primaryReplica) { this.fs = fs; this.fileInfo = fileInfo; this.cacheConf = cacheConf; this.noReadahead = conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { this.cfBloomType = cfBloomType; } else { LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " + "cfBloomType=" + cfBloomType + " (disabled in config)"); this.cfBloomType = BloomType.NONE; } this.primaryReplica = primaryReplica; }
@Test public void testBloomFilter() throws Exception { FileSystem fs = FileSystem.getLocal(conf); conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); // write the file Path f = new Path(ROOT_DIR, getName()); HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) .withChecksumType(CKTYPE) .withBytesPerCheckSum(CKBYTES).build(); // Make a store file and write data to it. StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) .withFilePath(f) .withBloomType(BloomType.ROW) .withMaxKeyCount(2000) .withFileContext(meta) .build(); bloomWriteRead(writer, fs); }
@Before public void setUp() throws IOException { conf = TEST_UTIL.getConfiguration(); this.conf.set("dfs.datanode.data.dir.perm", "700"); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE); conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); conf.setBoolean(CacheConfig.CACHE_DATA_BLOCKS_COMPRESSED_KEY, cacheCompressedData); cowType.modifyConf(conf); fs = HFileSystem.get(conf); CacheConfig.GLOBAL_BLOCK_CACHE_INSTANCE = blockCache; cacheConf = new CacheConfig(blockCache, true, true, cowType.shouldBeCached(BlockType.DATA), cowType.shouldBeCached(BlockType.LEAF_INDEX), cowType.shouldBeCached(BlockType.BLOOM_CHUNK), false, cacheCompressedData, false, false); }
/** * Create a store file writer. Client is responsible for closing file when done. * If metadata, add BEFORE closing using appendMetadata() * @param fs * @param dir Path to family directory. Makes the directory if doesn't exist. * Creates a file with a unique name in this directory. * @param blocksize * @param algorithm Pass null to get default. * @param c Pass null to get default. * @param conf HBase system configuration. used with bloom filters * @param cacheConf Cache configuration and reference. * @param bloomType column family setting for bloom filters * @param maxKeyCount estimated maximum number of keys we expect to add * @return HFile.Writer * @throws IOException */ public static StoreFile.Writer createWriter(final FileSystem fs, final Path dir, final int blocksize, final Compression.Algorithm algorithm, final KeyValue.KVComparator c, final Configuration conf, final CacheConfig cacheConf, BloomType bloomType, long maxKeyCount) throws IOException { if (!fs.exists(dir)) { fs.mkdirs(dir); } Path path = getUniqueFile(fs, dir); if (!BloomFilterFactory.isBloomEnabled(conf)) { bloomType = BloomType.NONE; } return new Writer(fs, path, blocksize, algorithm == null? HFile.DEFAULT_COMPRESSION_ALGORITHM: algorithm, conf, cacheConf, c == null ? KeyValue.COMPARATOR: c, bloomType, maxKeyCount); }
/** * Creates an HFile.Writer that also write helpful meta data. * @param fs file system to write to * @param path file name to create * @param blocksize HDFS block size * @param compress HDFS block compression * @param conf user configuration * @param comparator key comparator * @param bloomType bloom filter setting * @param maxKeys the expected maximum number of keys to be added. Was used * for Bloom filter size in {@link HFile} format version 1. * @throws IOException problem writing to FS */ public Writer(FileSystem fs, Path path, int blocksize, Compression.Algorithm compress, final Configuration conf, CacheConfig cacheConf, final KVComparator comparator, BloomType bloomType, long maxKeys) throws IOException { writer = HFile.getWriterFactory(conf, cacheConf).createWriter( fs, path, blocksize, compress, comparator.getRawComparator()); this.kvComparator = comparator; bloomFilterWriter = BloomFilterFactory.createBloomAtWrite(conf, cacheConf, bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); if (bloomFilterWriter != null) { this.bloomType = bloomType; LOG.info("Bloom filter type for " + path + ": " + this.bloomType + ", "+ bloomFilterWriter.getClass().getSimpleName()); } else { // Not using Bloom filters. this.bloomType = BloomType.NONE; } }
public void testBloomFilter() throws Exception { FileSystem fs = FileSystem.getLocal(conf); conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); // write the file Path f = new Path(ROOT_DIR, getName()); StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL) .withFilePath(f) .withBloomType(BloomType.ROW) .withMaxKeyCount(2000) .withChecksumType(CKTYPE) .withBytesPerChecksum(CKBYTES) .build(); bloomWriteRead(writer, fs); }