private static void decodeHanziSegment(BitSource bits, StringBuilder result, int count) throws FormatException { if (count * 13 > bits.available()) { throw FormatException.getFormatInstance(); } byte[] buffer = new byte[(count * 2)]; int offset = 0; while (count > 0) { int twoBytes = bits.readBits(13); int assembledTwoBytes = ((twoBytes / 96) << 8) | (twoBytes % 96); if (assembledTwoBytes < 959) { assembledTwoBytes += 41377; } else { assembledTwoBytes += 42657; } buffer[offset] = (byte) ((assembledTwoBytes >> 8) & 255); buffer[offset + 1] = (byte) (assembledTwoBytes & 255); offset += 2; count--; } try { result.append(new String(buffer, StringUtils.GB2312)); } catch (UnsupportedEncodingException e) { throw FormatException.getFormatInstance(); } }
private static void decodeKanjiSegment(BitSource bits, StringBuilder result, int count) throws FormatException { if (count * 13 > bits.available()) { throw FormatException.getFormatInstance(); } byte[] buffer = new byte[(count * 2)]; int offset = 0; while (count > 0) { int twoBytes = bits.readBits(13); int assembledTwoBytes = ((twoBytes / 192) << 8) | (twoBytes % 192); if (assembledTwoBytes < 7936) { assembledTwoBytes += 33088; } else { assembledTwoBytes += 49472; } buffer[offset] = (byte) (assembledTwoBytes >> 8); buffer[offset + 1] = (byte) assembledTwoBytes; offset += 2; count--; } try { result.append(new String(buffer, StringUtils.SHIFT_JIS)); } catch (UnsupportedEncodingException e) { throw FormatException.getFormatInstance(); } }
private static void decodeByteSegment(BitSource bits, StringBuilder result, int count, CharacterSetECI currentCharacterSetECI, Collection<byte[]> byteSegments, Map<DecodeHintType, ?> hints) throws FormatException { if (count * 8 > bits.available()) { throw FormatException.getFormatInstance(); } String encoding; byte[] readBytes = new byte[count]; for (int i = 0; i < count; i++) { readBytes[i] = (byte) bits.readBits(8); } if (currentCharacterSetECI == null) { encoding = StringUtils.guessEncoding(readBytes, hints); } else { encoding = currentCharacterSetECI.name(); } try { result.append(new String(readBytes, encoding)); byteSegments.add(readBytes); } catch (UnsupportedEncodingException e) { throw FormatException.getFormatInstance(); } }
/** * See specification GBT 18284-2000 */ private static void decodeHanziSegment(BitSource bits, StringBuilder result, int count) throws FormatException { // Don't crash trying to read more bits than we have available. if (count * 13 > bits.available()) { throw FormatException.getFormatInstance(); } // Each character will require 2 bytes. Read the characters as 2-byte pairs // and decode as GB2312 afterwards byte[] buffer = new byte[2 * count]; int offset = 0; while (count > 0) { // Each 13 bits encodes a 2-byte character int twoBytes = bits.readBits(13); int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060); if (assembledTwoBytes < 0x003BF) { // In the 0xA1A1 to 0xAAFE range assembledTwoBytes += 0x0A1A1; } else { // In the 0xB0A1 to 0xFAFE range assembledTwoBytes += 0x0A6A1; } buffer[offset] = (byte) ((assembledTwoBytes >> 8) & 0xFF); buffer[offset + 1] = (byte) (assembledTwoBytes & 0xFF); offset += 2; count--; } try { result.append(new String(buffer, StringUtils.GB2312)); } catch (UnsupportedEncodingException ignored) { throw FormatException.getFormatInstance(); } }
private static void decodeKanjiSegment(BitSource bits, StringBuilder result, int count) throws FormatException { // Don't crash trying to read more bits than we have available. if (count * 13 > bits.available()) { throw FormatException.getFormatInstance(); } // Each character will require 2 bytes. Read the characters as 2-byte pairs // and decode as Shift_JIS afterwards byte[] buffer = new byte[2 * count]; int offset = 0; while (count > 0) { // Each 13 bits encodes a 2-byte character int twoBytes = bits.readBits(13); int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0); if (assembledTwoBytes < 0x01F00) { // In the 0x8140 to 0x9FFC range assembledTwoBytes += 0x08140; } else { // In the 0xE040 to 0xEBBF range assembledTwoBytes += 0x0C140; } buffer[offset] = (byte) (assembledTwoBytes >> 8); buffer[offset + 1] = (byte) assembledTwoBytes; offset += 2; count--; } // Shift_JIS may not be supported in some environments: try { result.append(new String(buffer, StringUtils.SHIFT_JIS)); } catch (UnsupportedEncodingException ignored) { throw FormatException.getFormatInstance(); } }
private static void decodeByteSegment(BitSource bits, StringBuilder result, int count, CharacterSetECI currentCharacterSetECI, Collection<byte[]> byteSegments, Map<DecodeHintType,?> hints) throws FormatException { // Don't crash trying to read more bits than we have available. if (8 * count > bits.available()) { throw FormatException.getFormatInstance(); } byte[] readBytes = new byte[count]; for (int i = 0; i < count; i++) { readBytes[i] = (byte) bits.readBits(8); } String encoding; if (currentCharacterSetECI == null) { // The spec isn't clear on this mode; see // section 6.4.5: t does not say which encoding to assuming // upon decoding. I have seen ISO-8859-1 used as well as // Shift_JIS -- without anything like an ECI designator to // give a hint. encoding = StringUtils.guessEncoding(readBytes, hints); } else { encoding = currentCharacterSetECI.name(); } try { result.append(new String(readBytes, encoding)); } catch (UnsupportedEncodingException ignored) { throw FormatException.getFormatInstance(); } byteSegments.add(readBytes); }