private static byte[] uncompressByteString(ByteString bs, Dictionary dict) throws IOException { InputStream in = bs.newInput(); byte status = (byte)in.read(); if (status == Dictionary.NOT_IN_DICTIONARY) { byte[] arr = new byte[StreamUtils.readRawVarint32(in)]; int bytesRead = in.read(arr); if (bytesRead != arr.length) { throw new IOException("Cannot read; wanted " + arr.length + ", but got " + bytesRead); } if (dict != null) dict.addEntry(arr, 0, arr.length); return arr; } else { // Status here is the higher-order byte of index of the dictionary entry. short dictIdx = StreamUtils.toShort(status, (byte)in.read()); byte[] entry = dict.getEntry(dictIdx); if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } return entry; } }
private int readIntoArray(byte[] to, int offset, Dictionary dict) throws IOException { byte status = (byte)in.read(); if (status == Dictionary.NOT_IN_DICTIONARY) { // status byte indicating that data to be read is not in dictionary. // if this isn't in the dictionary, we need to add to the dictionary. int length = StreamUtils.readRawVarint32(in); IOUtils.readFully(in, to, offset, length); dict.addEntry(to, offset, length); return length; } else { // the status byte also acts as the higher order byte of the dictionary entry. short dictIdx = StreamUtils.toShort(status, (byte)in.read()); byte[] entry = dict.getEntry(dictIdx); if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } // now we write the uncompressed value. Bytes.putBytes(to, offset, entry, 0, entry.length); return entry.length; } }
/** * Reads the next compressed entry and returns it as a byte array * * @param in the DataInput to read from * @param dict the dictionary we use for our read. * @return the uncompressed array. */ @Deprecated static byte[] readCompressed(DataInput in, Dictionary dict) throws IOException { byte status = in.readByte(); if (status == Dictionary.NOT_IN_DICTIONARY) { int length = WritableUtils.readVInt(in); // if this isn't in the dictionary, we need to add to the dictionary. byte[] arr = new byte[length]; in.readFully(arr); if (dict != null) dict.addEntry(arr, 0, length); return arr; } else { // Status here is the higher-order byte of index of the dictionary entry // (when its not Dictionary.NOT_IN_DICTIONARY -- dictionary indices are // shorts). short dictIdx = toShort(status, in.readByte()); byte[] entry = dict.getEntry(dictIdx); if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } return entry; } }
/** * Compresses and writes an array to a DataOutput * * @param data the array to write. * @param out the DataOutput to write into * @param dict the dictionary to use for compression */ @Deprecated static void writeCompressed(byte[] data, int offset, int length, DataOutput out, Dictionary dict) throws IOException { short dictIdx = Dictionary.NOT_IN_DICTIONARY; if (dict != null) { dictIdx = dict.findEntry(data, offset, length); } if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { // not in dict out.writeByte(Dictionary.NOT_IN_DICTIONARY); WritableUtils.writeVInt(out, length); out.write(data, offset, length); } else { out.writeShort(dictIdx); } }
public CompressionContext(Class<? extends Dictionary> dictType, boolean recoveredEdits, boolean hasTagCompression) throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { Constructor<? extends Dictionary> dictConstructor = dictType.getConstructor(); regionDict = dictConstructor.newInstance(); tableDict = dictConstructor.newInstance(); familyDict = dictConstructor.newInstance(); qualifierDict = dictConstructor.newInstance(); rowDict = dictConstructor.newInstance(); if (recoveredEdits) { // This will never change regionDict.init(1); tableDict.init(1); } else { regionDict.init(Short.MAX_VALUE); tableDict.init(Short.MAX_VALUE); } rowDict.init(Short.MAX_VALUE); familyDict.init(Byte.MAX_VALUE); qualifierDict.init(Byte.MAX_VALUE); if (hasTagCompression) { tagCompressionContext = new TagCompressionContext(dictType, Short.MAX_VALUE); } }
/** * Uncompress tags from the InputStream and writes to the destination array. * @param src Stream where the compressed tags are available * @param dest Destination array where to write the uncompressed tags * @param offset Offset in destination where tags to be written * @param length Length of all tag bytes * @throws IOException */ public void uncompressTags(InputStream src, byte[] dest, int offset, int length) throws IOException { int endOffset = offset + length; while (offset < endOffset) { byte status = (byte) src.read(); if (status == Dictionary.NOT_IN_DICTIONARY) { int tagLen = StreamUtils.readRawVarint32(src); offset = Bytes.putAsShort(dest, offset, tagLen); IOUtils.readFully(src, dest, offset, tagLen); tagDict.addEntry(dest, offset, tagLen); offset += tagLen; } else { short dictIdx = StreamUtils.toShort(status, (byte) src.read()); byte[] entry = tagDict.getEntry(dictIdx); if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } offset = Bytes.putAsShort(dest, offset, entry.length); System.arraycopy(entry, 0, dest, offset, entry.length); offset += entry.length; } } }
/** * Uncompress tags from the InputStream and writes to the destination array. * @param src Stream where the compressed tags are available * @param dest Destination array where to write the uncompressed tags * @param offset Offset in destination where tags to be written * @param length Length of all tag bytes * @throws IOException */ public void uncompressTags(InputStream src, byte[] dest, int offset, short length) throws IOException { int endOffset = offset + length; while (offset < endOffset) { byte status = (byte) src.read(); if (status == Dictionary.NOT_IN_DICTIONARY) { // We are writing short as tagLen. So can downcast this without any risk. short tagLen = (short) StreamUtils.readRawVarint32(src); offset = Bytes.putShort(dest, offset, tagLen); IOUtils.readFully(src, dest, offset, tagLen); tagDict.addEntry(dest, offset, tagLen); offset += tagLen; } else { short dictIdx = StreamUtils.toShort(status, (byte) src.read()); byte[] entry = tagDict.getEntry(dictIdx); if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } offset = Bytes.putShort(dest, offset, (short) entry.length); System.arraycopy(entry, 0, dest, offset, entry.length); offset += entry.length; } } }
@Override public ByteString compress(byte[] data, Dictionary dict) throws IOException { writeCompressed(data, dict); ByteString result = ByteString.copyFrom(this.buf, 0, this.count); reset(); // Only resets the count - we reuse the byte array. return result; }
private void writeCompressed(byte[] data, Dictionary dict) throws IOException { assert dict != null; short dictIdx = dict.findEntry(data, 0, data.length); if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { write(Dictionary.NOT_IN_DICTIONARY); StreamUtils.writeRawVInt32(this, data.length); write(data, 0, data.length); } else { StreamUtils.writeShort(this, dictIdx); } }
private void write(byte[] data, int offset, int length, Dictionary dict) throws IOException { short dictIdx = Dictionary.NOT_IN_DICTIONARY; if (dict != null) { dictIdx = dict.findEntry(data, offset, length); } if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { out.write(Dictionary.NOT_IN_DICTIONARY); StreamUtils.writeRawVInt32(out, length); out.write(data, offset, length); } else { StreamUtils.writeShort(out, dictIdx); } }
/** * Reads a compressed entry into an array. * The output into the array ends up length-prefixed. * * @param to the array to write into * @param offset array offset to start writing to * @param in the DataInput to read from * @param dict the dictionary to use for compression * * @return the length of the uncompressed data */ @Deprecated static int uncompressIntoArray(byte[] to, int offset, DataInput in, Dictionary dict) throws IOException { byte status = in.readByte(); if (status == Dictionary.NOT_IN_DICTIONARY) { // status byte indicating that data to be read is not in dictionary. // if this isn't in the dictionary, we need to add to the dictionary. int length = WritableUtils.readVInt(in); in.readFully(to, offset, length); dict.addEntry(to, offset, length); return length; } else { // the status byte also acts as the higher order byte of the dictionary // entry short dictIdx = toShort(status, in.readByte()); byte[] entry; try { entry = dict.getEntry(dictIdx); } catch (Exception ex) { throw new IOException("Unable to uncompress the log entry", ex); } if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } // now we write the uncompressed value. Bytes.putBytes(to, offset, entry, 0, entry.length); return entry.length; } }
public TagCompressionContext(Class<? extends Dictionary> dictType, int dictCapacity) throws SecurityException, NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException { Constructor<? extends Dictionary> dictConstructor = dictType.getConstructor(); tagDict = dictConstructor.newInstance(); tagDict.init(dictCapacity); }
/** * Uncompress tags from the input ByteBuffer and writes to the destination array. * @param src Buffer where the compressed tags are available * @param dest Destination array where to write the uncompressed tags * @param offset Offset in destination where tags to be written * @param length Length of all tag bytes * @return bytes count read from source to uncompress all tags. * @throws IOException */ public int uncompressTags(ByteBuffer src, byte[] dest, int offset, int length) throws IOException { int srcBeginPos = src.position(); int endOffset = offset + length; while (offset < endOffset) { byte status = src.get(); int tagLen; if (status == Dictionary.NOT_IN_DICTIONARY) { tagLen = StreamUtils.readRawVarint32(src); offset = Bytes.putAsShort(dest, offset, tagLen); src.get(dest, offset, tagLen); tagDict.addEntry(dest, offset, tagLen); offset += tagLen; } else { short dictIdx = StreamUtils.toShort(status, src.get()); byte[] entry = tagDict.getEntry(dictIdx); if (entry == null) { throw new IOException("Missing dictionary entry for index " + dictIdx); } tagLen = entry.length; offset = Bytes.putAsShort(dest, offset, tagLen); System.arraycopy(entry, 0, dest, offset, tagLen); offset += tagLen; } } return src.position() - srcBeginPos; }
private void write(byte[] data, int offset, int length, OutputStream out) throws IOException { short dictIdx = Dictionary.NOT_IN_DICTIONARY; if (tagDict != null) { dictIdx = tagDict.findEntry(data, offset, length); } if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { out.write(Dictionary.NOT_IN_DICTIONARY); StreamUtils.writeRawVInt32(out, length); out.write(data, offset, length); } else { StreamUtils.writeShort(out, dictIdx); } }