@Override public void startBlockEncoding(HFileBlockEncodingContext blkEncodingCtx, DataOutputStream out) throws IOException { if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) { throw new IOException(this.getClass().getName() + " only accepts " + HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context."); } HFileBlockDefaultEncodingContext encodingCtx = (HFileBlockDefaultEncodingContext) blkEncodingCtx; encodingCtx.prepareEncoding(out); PrefixTreeEncoder builder = EncoderFactory.checkOut(out, encodingCtx.getHFileContext() .isIncludesMvcc()); PrefixTreeEncodingState state = new PrefixTreeEncodingState(); state.builder = builder; blkEncodingCtx.setEncodingState(state); }
/** * The following methods write data for each cell in the row, mostly consisting of indexes or * offsets into the timestamp/column data structures that are written in the middle of the block. * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary * search of a particular column/timestamp combination. * <p> * Branch nodes will not have any data in these sections. * </p> */ protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { if (blockMeta.getFamilyOffsetWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode .getFirstInsertionIndex() + i : 0; int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( cellInsertionIndex); int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( sortedIndex); UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); } }
@Before public void compile() throws IOException { // Always run with tags. But should also ensure that KVs without tags work fine os = new ByteArrayOutputStream(1 << 20); encoder = new PrefixTreeEncoder(os, includeMemstoreTS); inputKvs = rows.getInputs(); for (KeyValue kv : inputKvs) { encoder.write(kv); } encoder.flush(); totalBytes = encoder.getTotalBytes(); blockMetaWriter = encoder.getBlockMeta(); outputBytes = os.toByteArray(); // start reading, but save the assertions for @Test methods buffer = ByteBuffer.wrap(outputBytes); blockMetaReader = new PrefixTreeBlockMeta(buffer); searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(), blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength(), blockMetaReader.getMaxTagsLength()); searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS); }
/** * The following methods write data for each cell in the row, mostly consisting of indexes or * offsets into the timestamp/column data structures that are written in the middle of the block. * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary * search of a particular column/timestamp combination. * <p/> * Branch nodes will not have any data in these sections. */ protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { if (blockMeta.getFamilyOffsetWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode .getFirstInsertionIndex() + i : 0; int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( cellInsertionIndex); int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( sortedIndex); UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); } }
@Before public void compile() throws IOException { os = new ByteArrayOutputStream(1 << 20); encoder = new PrefixTreeEncoder(os, includeMemstoreTS); inputKvs = rows.getInputs(); for (KeyValue kv : inputKvs) { encoder.write(kv); } encoder.flush(); totalBytes = encoder.getTotalBytes(); blockMetaWriter = encoder.getBlockMeta(); outputBytes = os.toByteArray(); // start reading, but save the assertions for @Test methods buffer = ByteBuffer.wrap(outputBytes); blockMetaReader = new PrefixTreeBlockMeta(buffer); searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(), blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength()); searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS); }
@Override public int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out) throws IOException { PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState(); PrefixTreeEncoder builder = state.builder; builder.write(cell); int size = KeyValueUtil.length(cell); if (encodingCtx.getHFileContext().isIncludesMvcc()) { size += WritableUtils.getVIntSize(cell.getSequenceId()); } return size; }
@Override public void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out, byte[] uncompressedBytesWithHeader) throws IOException { PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState(); PrefixTreeEncoder builder = state.builder; builder.flush(); EncoderFactory.checkIn(builder); // do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE? if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) { encodingCtx.postEncoding(BlockType.ENCODED_DATA); } else { encodingCtx.postEncoding(BlockType.DATA); } }
public TestPrefixTreeSearcher(TestRowData testRows) throws IOException { this.rows = testRows; ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20); PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true); for (KeyValue kv : rows.getInputs()) { kvBuilder.write(kv); } kvBuilder.flush(); byte[] outputBytes = os.toByteArray(); this.block = ByteBuffer.wrap(outputBytes); }
private void internalEncodeKeyValues(DataOutputStream encodedOutputStream, ByteBuffer rawKeyValues, boolean includesMvccVersion, boolean includesTag) throws IOException { rawKeyValues.rewind(); PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion); try { KeyValue kv; while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion, includesTag)) != null) { builder.write(kv); } builder.flush(); } finally { EncoderFactory.checkIn(builder); } }
@Override public int encode(KeyValue kv, HFileBlockEncodingContext encodingCtx, DataOutputStream out) throws IOException { PrefixTreeEncodingState state = (PrefixTreeEncodingState) encodingCtx.getEncodingState(); PrefixTreeEncoder builder = state.builder; builder.write(kv); int size = kv.getLength(); if (encodingCtx.getHFileContext().isIncludesMvcc()) { size += WritableUtils.getVIntSize(kv.getMvccVersion()); } return size; }
private void internalEncodeKeyValues(DataOutputStream encodedOutputStream, ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException { rawKeyValues.rewind(); PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion); try{ KeyValue kv; while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) { builder.write(kv); } builder.flush(); }finally{ EncoderFactory.checkIn(builder); } }
/*********************** construct *************************/ public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) { reconstruct(keyValueBuilder, tokenizerNode); }
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) { this.prefixTreeEncoder = prefixTreeEncoder; reset(tokenizerNode); }
public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) { reconstruct(prefixTreeEncoder); }
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) { this.prefixTreeEncoder = prefixTreeEncoder; this.blockMeta = prefixTreeEncoder.getBlockMeta(); reset(); }