/** * Validates a store file by opening and closing it. In HFileV2 this should not be an expensive * operation. * @param path the path to the store file */ private void validateStoreFile(Path path) throws IOException { StoreFile storeFile = null; try { storeFile = new StoreFile(this.fs, path, this.conf, this.cacheConf, this.family.getBloomFilterType(), NoOpDataBlockEncoder.INSTANCE); passSchemaMetricsTo(storeFile); storeFile.createReader(); } catch (IOException e) { LOG.error("Failed to open store file : " + path + ", keeping it in tmp location", e); throw e; } finally { if (storeFile != null) { storeFile.closeReader(false); } } }
/** * Validates a store file by opening and closing it. In HFileV2 this should * not be an expensive operation. * * @param path the path to the store file */ private void validateStoreFile(Path path) throws IOException { StoreFile storeFile = null; try { storeFile = new StoreFile(this.fs, path, this.conf, this.cacheConf, this.family.getBloomFilterType(), NoOpDataBlockEncoder.INSTANCE); passSchemaMetricsTo(storeFile); storeFile.createReader(); } catch (IOException e) { LOG.error("Failed to open store file : " + path + ", keeping it in tmp location", e); throw e; } finally { if (storeFile != null) { storeFile.closeReader(false); } } }
/** * Validates a store file by opening and closing it. In HFileV2 this should * not be an expensive operation. * * @param path the path to the store file */ private void validateStoreFile(Path path) throws IOException { StoreFile storeFile = null; try { storeFile = new StoreFile(this.fs, path, this.conf, this.cacheConf, this.family.getBloomFilterType(), NoOpDataBlockEncoder.INSTANCE); storeFile.createReader(); } catch (IOException e) { LOG.error("Failed to open store file : " + path + ", keeping it in tmp location", e); throw e; } finally { if (storeFile != null) { storeFile.closeReader(false); } } }
/** * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram * depending on the underlying files (10-20MB?). * @param fs The current file system to use. * @param p The path of the file. * @param blockcache <code>true</code> if the block cache is enabled. * @param conf The current configuration. * @param cacheConf The cache configuration and block cache reference. * @param cfBloomType The bloom type to use for this store file as specified by column family * configuration. This may or may not be the same as the Bloom filter type actually * present in the HFile, because column family configuration might change. If this is * {@link BloomType#NONE}, the existing Bloom filter is ignored. * @param dataBlockEncoder data block encoding algorithm. * @throws IOException When opening the reader fails. */ public StoreFile(final FileSystem fs, final Path p, final Configuration conf, final CacheConfig cacheConf, final BloomType cfBloomType, final HFileDataBlockEncoder dataBlockEncoder) throws IOException { this.fs = fs; this.path = p; this.cacheConf = cacheConf; this.dataBlockEncoder = dataBlockEncoder == null ? NoOpDataBlockEncoder.INSTANCE : dataBlockEncoder; if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { this.cfBloomType = cfBloomType; } else { LOG.info("Ignoring bloom filter check for file " + path + ": " + "cfBloomType=" + cfBloomType + " (disabled in config)"); this.cfBloomType = BloomType.NONE; } // cache the modification time stamp of this store file FileStatus[] stats = FSUtils.listStatus(fs, p, null); if (stats != null && stats.length == 1) { this.modificationTimeStamp = stats[0].getModificationTime(); } else { this.modificationTimeStamp = 0; } SchemaMetrics.configureGlobally(conf); initPossibleIndexesAndReference(fs, p, conf); }
private List<KeyValue> prepareListOfTestSeeks(Path path) throws IOException { List<KeyValue> allKeyValues = new ArrayList<KeyValue>(); // read all of the key values StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(), path, configuration, cacheConf, BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader reader = storeFile.createReader(); StoreFileScanner scanner = reader.getStoreFileScanner(true, false); KeyValue current; scanner.seek(KeyValue.LOWESTKEY); while (null != (current = scanner.next())) { allKeyValues.add(current); } storeFile.closeReader(cacheConf.shouldEvictOnClose()); // pick seeks by random List<KeyValue> seeks = new ArrayList<KeyValue>(); for (int i = 0; i < numberOfSeeks; ++i) { KeyValue keyValue = allKeyValues.get( randomizer.nextInt(allKeyValues.size())); seeks.add(keyValue); } clearBlockCache(); return seeks; }
/** * Test a data block encoder on the given HFile. Output results to console. * @param kvLimit The limit of KeyValue which will be analyzed. * @param hfilePath an HFile path on the file system. * @param compressionName Compression algorithm used for comparison. * @param doBenchmark Run performance benchmarks. * @param doVerify Verify correctness. * @throws IOException When pathName is incorrect. */ public static void testCodecs(Configuration conf, int kvLimit, String hfilePath, String compressionName, boolean doBenchmark, boolean doVerify) throws IOException { // create environment Path path = new Path(hfilePath); CacheConfig cacheConf = new CacheConfig(conf); FileSystem fs = FileSystem.get(conf); StoreFile hsf = new StoreFile(fs, path, conf, cacheConf, StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader reader = hsf.createReader(); reader.loadFileInfo(); KeyValueScanner scanner = reader.getStoreFileScanner(true, true); // run the utilities DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); comp.checkStatistics(scanner, kvLimit); if (doVerify) { comp.verifyCodecs(scanner, kvLimit); } if (doBenchmark) { comp.benchmarkCodecs(); } comp.displayStatistics(); // cleanup scanner.close(); reader.close(cacheConf.shouldEvictOnClose()); }
MockStoreFile(long length, boolean isRef) throws IOException { super(TEST_UTIL.getTestFileSystem(), TEST_FILE, TEST_UTIL.getConfiguration(), new CacheConfig(TEST_UTIL.getConfiguration()), BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); this.length = length; this.isRef = isRef; }
/** * Write a file and then assert that we can read from top and bottom halves * using two HalfMapFiles. * @throws Exception */ public void testBasicHalfMapFile() throws Exception { // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. Path outputDir = new Path(new Path(this.testDir, "7e0102"), "familyname"); StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 2 * 1024) .withOutputDir(outputDir) .build(); writeStoreFile(writer); checkHalfHFile(new StoreFile(this.fs, writer.getPath(), conf, cacheConf, StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE)); }
/** * Test that our mechanism of writing store files in one region to reference * store files in other regions works. * @throws IOException */ public void testReference() throws IOException { // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname"); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024) .withOutputDir(storedir) .build(); writeStoreFile(writer); StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf, StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); StoreFile.Reader reader = hsf.createReader(); // Split on a row, not in middle of row. Midkey returned by reader // may be in middle of row. Create new one with empty column and // timestamp. KeyValue kv = KeyValue.createKeyValueFromKey(reader.midkey()); byte [] midRow = kv.getRow(); kv = KeyValue.createKeyValueFromKey(reader.getLastKey()); byte [] finalRow = kv.getRow(); // Make a reference Path refPath = StoreFile.split(fs, storedir, hsf, midRow, Range.top); StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf, StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); // Now confirm that I can read from the reference and that it only gets // keys from top half of the file. HFileScanner s = refHsf.createReader().getScanner(false, false); for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) { ByteBuffer bb = s.getKey(); kv = KeyValue.createKeyValueFromKey(bb); if (first) { assertTrue(Bytes.equals(kv.getRow(), midRow)); first = false; } } assertTrue(Bytes.equals(kv.getRow(), finalRow)); }
public void testHFileLink() throws IOException { final String columnFamily = "f"; Configuration testConf = new Configuration(this.conf); FSUtils.setRootDir(testConf, this.testDir); HRegionInfo hri = new HRegionInfo(Bytes.toBytes("table-link")); Path storedir = new Path(new Path(this.testDir, new Path(hri.getTableNameAsString(), hri.getEncodedName())), columnFamily); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024) .withOutputDir(storedir) .build(); Path storeFilePath = writer.getPath(); writeStoreFile(writer); writer.close(); Path dstPath = new Path(this.testDir, new Path("test-region", columnFamily)); HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName()); Path linkFilePath = new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); // Try to open store file from link StoreFile hsf = new StoreFile(this.fs, linkFilePath, testConf, cacheConf, StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); assertTrue(hsf.isLink()); // Now confirm that I can read from the link int count = 1; HFileScanner s = hsf.createReader().getScanner(false, false); s.seekTo(); while (s.next()) { count++; } assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); }
MockStoreFile(long length, long ageInDisk, boolean isRef, long sequenceid) throws IOException { super(TEST_UTIL.getTestFileSystem(), TEST_FILE, TEST_UTIL.getConfiguration(), new CacheConfig(TEST_UTIL.getConfiguration()), BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); this.length = length; this.isRef = isRef; this.ageInDisk = ageInDisk; this.sequenceid = sequenceid; }