public static HFileDataBlockEncoder createFromFileInfo( FileInfo fileInfo) throws IOException { DataBlockEncoding encoding = DataBlockEncoding.NONE; byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING); if (dataBlockEncodingType != null) { String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); try { encoding = DataBlockEncoding.valueOf(dataBlockEncodingStr); } catch (IllegalArgumentException ex) { throw new IOException("Invalid data block encoding type in file info: " + dataBlockEncodingStr, ex); } } if (encoding == DataBlockEncoding.NONE) { return NoOpDataBlockEncoder.INSTANCE; } return new HFileDataBlockEncoderImpl(encoding); }
/** * Add last bits of metadata to file info before it is written out. */ protected void finishFileInfo() throws IOException { if (lastCell != null) { // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean // byte buffer. Won't take a tuple. byte [] lastKey = CellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell); fileInfo.append(FileInfo.LASTKEY, lastKey, false); } // Average key length. int avgKeyLen = entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); // Average value length. int avgValueLen = entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); fileInfo.append(FileInfo.CREATE_TIME_TS, Bytes.toBytes(hFileContext.getFileCreateTime()), false); }
/** * Add last bits of metadata to file info before it is written out. */ protected void finishFileInfo() throws IOException { if (lastKeyBuffer != null) { // Make a copy. The copy is stuffed into HMapWritable. Needs a clean // byte buffer. Won't take a tuple. fileInfo.append(FileInfo.LASTKEY, Arrays.copyOfRange(lastKeyBuffer, lastKeyOffset, lastKeyOffset + lastKeyLength), false); } // Average key length. int avgKeyLen = entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); // Average value length. int avgValueLen = entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); }
/** * Add last bits of metadata to file info before it is written out. */ protected void finishFileInfo() throws IOException { if (lastCell != null) { // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean // byte buffer. Won't take a tuple. byte [] lastKey = CellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell); fileInfo.append(FileInfo.LASTKEY, lastKey, false); } // Average key length. int avgKeyLen = entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); // Average value length. int avgValueLen = entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); }
/** * Add last bits of metadata to file info before it is written out. */ protected void finishFileInfo() throws IOException { if (lastKeyBuffer != null) { // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean // byte buffer. Won't take a tuple. fileInfo.append(FileInfo.LASTKEY, Arrays.copyOfRange(lastKeyBuffer, lastKeyOffset, lastKeyOffset + lastKeyLength), false); } // Average key length. int avgKeyLen = entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); // Average value length. int avgValueLen = entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); }
protected void finishFileInfo() throws IOException { super.finishFileInfo(); if (hFileContext.getDataBlockEncoding() == DataBlockEncoding.PREFIX_TREE) { // In case of Prefix Tree encoding, we always write tags information into HFiles even if all // KVs are having no tags. fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false); } else if (hFileContext.isIncludesTags()) { // When tags are not being written in this file, MAX_TAGS_LEN is excluded // from the FileInfo fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false); boolean tagsCompressed = (hFileContext.getDataBlockEncoding() != DataBlockEncoding.NONE) && hFileContext.isCompressTags(); fileInfo.append(FileInfo.TAGS_COMPRESSED, Bytes.toBytes(tagsCompressed), false); } }
@Override protected void finishFileInfo() throws IOException { super.finishFileInfo(); // In version 1, we store comparator name in the file info. fileInfo.append(FileInfo.COMPARATOR, Bytes.toBytes(comparator.getClass().getName()), false); }
public static HFileDataBlockEncoder createFromFileInfo( FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache) throws IOException { boolean hasPreferredCacheEncoding = preferredEncodingInCache != null && preferredEncodingInCache != DataBlockEncoding.NONE; byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING); if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) { return NoOpDataBlockEncoder.INSTANCE; } DataBlockEncoding onDisk; if (dataBlockEncodingType == null) { onDisk = DataBlockEncoding.NONE; }else { String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); try { onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr); } catch (IllegalArgumentException ex) { throw new IOException("Invalid data block encoding type in file info: " + dataBlockEncodingStr, ex); } } DataBlockEncoding inCache; if (onDisk == DataBlockEncoding.NONE) { // This is an "in-cache-only" encoding or fully-unencoded scenario. // Either way, we use the given encoding (possibly NONE) specified by // the column family in cache. inCache = preferredEncodingInCache; } else { // Leave blocks in cache encoded the same way as they are on disk. // If we switch encoding type for the CF or the in-cache-only encoding // flag, old files will keep their encoding both on disk and in cache, // but new files will be generated with the new encoding. inCache = onDisk; } return new HFileDataBlockEncoderImpl(onDisk, inCache); }
protected void finishFileInfo() throws IOException { if (lastCell != null) { // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean // byte buffer. Won't take a tuple. byte [] lastKey = PrivateCellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell); fileInfo.append(FileInfo.LASTKEY, lastKey, false); } // Average key length. int avgKeyLen = entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); fileInfo.append(FileInfo.CREATE_TIME_TS, Bytes.toBytes(hFileContext.getFileCreateTime()), false); // Average value length. int avgValueLen = entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); if (hFileContext.isIncludesTags()) { // When tags are not being written in this file, MAX_TAGS_LEN is excluded // from the FileInfo fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false); boolean tagsCompressed = (hFileContext.getDataBlockEncoding() != DataBlockEncoding.NONE) && hFileContext.isCompressTags(); fileInfo.append(FileInfo.TAGS_COMPRESSED, Bytes.toBytes(tagsCompressed), false); } }
public static HFileDataBlockEncoder createFromFileInfo( FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache) throws IOException { byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING); if (dataBlockEncodingType == null) { return NoOpDataBlockEncoder.INSTANCE; } String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType); DataBlockEncoding onDisk; try { onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr); } catch (IllegalArgumentException ex) { throw new IOException("Invalid data block encoding type in file info: " + dataBlockEncodingStr, ex); } DataBlockEncoding inCache; if (onDisk == DataBlockEncoding.NONE) { // This is an "in-cache-only" encoding or fully-unencoded scenario. // Either way, we use the given encoding (possibly NONE) specified by // the column family in cache. inCache = preferredEncodingInCache; } else { // Leave blocks in cache encoded the same way as they are on disk. // If we switch encoding type for the CF or the in-cache-only encoding // flag, old files will keep their encoding both on disk and in cache, // but new files will be generated with the new encoding. inCache = onDisk; } return new HFileDataBlockEncoderImpl(onDisk, inCache); }
/** * Extracts some details about the files to compact that are commonly needed by compactors. * * @param filesToCompact Files. * @param allFiles Whether all files are included for compaction * @return The result. */ protected FileDetails getFileDetails(Collection<StoreFile> filesToCompact, boolean allFiles) throws IOException { FileDetails fd = new FileDetails(); long oldestHFileTimeStampToKeepMVCC = System.currentTimeMillis() - (1000L * 60 * 60 * 24 * this.keepSeqIdPeriod); for (StoreFile file : filesToCompact) { if (allFiles && (file.getModificationTimeStamp() < oldestHFileTimeStampToKeepMVCC)) { // when isAllFiles is true, all files are compacted so we can calculate the smallest // MVCC value to keep if (fd.minSeqIdToKeep < file.getMaxMemstoreTS()) { fd.minSeqIdToKeep = file.getMaxMemstoreTS(); } } long seqNum = file.getMaxSequenceId(); fd.maxSeqId = Math.max(fd.maxSeqId, seqNum); StoreFile.Reader r = file.getReader(); if (r == null) { LOG.warn("Null reader for " + file.getPath()); continue; } // NOTE: use getEntries when compacting instead of getFilterEntries, otherwise under-sized // blooms can cause progress to be miscalculated or if the user switches bloom // type (e.g. from ROW to ROWCOL) long keyCount = r.getEntries(); fd.maxKeyCount += keyCount; // calculate the latest MVCC readpoint in any of the involved store files Map<byte[], byte[]> fileInfo = r.loadFileInfo(); byte tmp[] = null; // Get and set the real MVCCReadpoint for bulk loaded files, which is the // SeqId number. if (r.isBulkLoaded()) { fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, r.getSequenceID()); } else { tmp = fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY); if (tmp != null) { fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp)); } } tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN); if (tmp != null) { fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp)); } // If required, calculate the earliest put timestamp of all involved storefiles. // This is used to remove family delete marker during compaction. long earliestPutTs = 0; if (allFiles) { tmp = fileInfo.get(StoreFile.EARLIEST_PUT_TS); if (tmp == null) { // There's a file with no information, must be an old one // assume we have very old puts fd.earliestPutTs = earliestPutTs = HConstants.OLDEST_TIMESTAMP; } else { earliestPutTs = Bytes.toLong(tmp); fd.earliestPutTs = Math.min(fd.earliestPutTs, earliestPutTs); } } if (LOG.isDebugEnabled()) { LOG.debug("Compacting " + file + ", keycount=" + keyCount + ", bloomtype=" + r .getBloomFilterType().toString() + ", size=" + TraditionalBinaryPrefix .long2String(r.length(), "", 1) + ", encoding=" + r.getHFileReader() .getDataBlockEncoding() + ", seqNum=" + seqNum + (allFiles ? ", earliestPutTs=" + earliestPutTs : "")); } } return fd; }