public B2CConverter(String encoding, boolean replaceOnError) throws IOException { byte[] left = new byte[LEFTOVER_SIZE]; leftovers = ByteBuffer.wrap(left); CodingErrorAction action; if (replaceOnError) { action = CodingErrorAction.REPLACE; } else { action = CodingErrorAction.REPORT; } Charset charset = getCharset(encoding); // Special case. Use the Apache Harmony based UTF-8 decoder because it // - a) rejects invalid sequences that the JVM decoder does not // - b) fails faster for some invalid sequences if (charset.equals(UTF_8)) { decoder = new Utf8Decoder(); } else { decoder = charset.newDecoder(); } decoder.onMalformedInput(action); decoder.onUnmappableCharacter(action); }
private boolean canDecodeFile(FileObject fo, String encoding) { CharsetDecoder decoder = Charset.forName(encoding).newDecoder().onUnmappableCharacter(CodingErrorAction.REPORT).onMalformedInput(CodingErrorAction.REPORT); try { BufferedInputStream bis = new BufferedInputStream(fo.getInputStream()); //I probably have to create such big buffer since I am not sure //how to cut the file to smaller byte arrays so it cannot happen //that an encoded character is divided by the arrays border. //In such case it might happen that the method woult return //incorrect value. byte[] buffer = new byte[(int) fo.getSize()]; bis.read(buffer); bis.close(); decoder.decode(ByteBuffer.wrap(buffer)); return true; } catch (CharacterCodingException ex) { //return false } catch (IOException ioe) { Logger.getLogger("global").log(Level.WARNING, "Error during charset verification", ioe); } return false; }
protected final void setEncoding(final String encoding) throws UnsupportedEncodingException { final Charset charSet = charsetForName(encoding); final CharsetEncoder encoder = charSet.newEncoder().onMalformedInput( CodingErrorAction.REPLACE).onUnmappableCharacter( CodingErrorAction.REPLACE); final float maxBytesPerChar = encoder.maxBytesPerChar(); final float averageBytesPerChar = encoder.averageBytesPerChar(); final boolean fixedWidthCharset = (maxBytesPerChar == Math.round(maxBytesPerChar)) && (maxBytesPerChar == averageBytesPerChar); // m_fixedWidthCharset = fixedWidthCharset; m_maxCharWidth = Math.round(maxBytesPerChar); m_charset = charSet; m_encoder = encoder; m_encoding = m_charset.name(); }
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
/** * Constructs a new encoder instance that iterates over {@code string}, converting * it to bytes using the charset {@code charset}. * * <p>The encoder reads up to {@code stepSize} characters at the same time, * buffering the results internally. {@code stepSize} must be at least 2 (this is to * ensure that surrogate pairs are processed correctly). * * @param string the string to iterate over, must not be {@code null} * @param charset the charset to use for encoding characters to bytes, must not be {@code null} * @param stepSize the number to characters to try encoding in each encoding step, must be * positive * @throws NullPointerException if {@code string} or {@code charset} is {@code null} * @throws IllegalArgumentException if {@code stepSize} is lesser than 2 */ public CharsetEncoderByteIterator(String string, Charset charset, int stepSize) { Objects.requireNonNull(string); Check.gt(stepSize, 1); // use the same settings as String.getBytes(Charset) this.encoder = charset.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .reset(); this.string = string; this.idx = 0; this.byteIdx = 0; this.flushed = false; // no need to allocate more chars than what the string can give us stepSize = Math.min(stepSize, string.length()); stepSize = Math.max(2, stepSize); // but ensure we can always handle surrogate pairs this.in = CharBuffer.allocate(stepSize); int outBufferSize = (int) ((stepSize + 1) * encoder.maxBytesPerChar()); this.out = ByteBuffer.allocate(outBufferSize); out.flip(); }
/** * Convert text in a given character set to a Unicode string. Any invalid * characters are replaced with U+FFFD. Returns null if the character set * is not recognized. * @param text ByteBuffer containing the character array to convert. * @param charsetName Character set it's in encoded in. * @return: Unicode string on success, null on failure. */ @CalledByNative private static String convertToUnicodeWithSubstitutions( ByteBuffer text, String charsetName) { try { Charset charset = Charset.forName(charsetName); // TODO(mmenke): Investigate if Charset.decode() can be used // instead. The question is whether it uses the proper replace // character. JDK CharsetDecoder docs say U+FFFD is the default, // but Charset.decode() docs say it uses the "charset's default // replacement byte array". CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoder.replaceWith("\uFFFD"); return decoder.decode(text).toString(); } catch (Exception e) { return null; } }
protected AbstractSessionOutputBuffer( final OutputStream outstream, final int buffersize, final Charset charset, final int minChunkLimit, final CodingErrorAction malformedCharAction, final CodingErrorAction unmappableCharAction) { super(); Args.notNull(outstream, "Input stream"); Args.notNegative(buffersize, "Buffer size"); this.outstream = outstream; this.buffer = new ByteArrayBuffer(buffersize); this.charset = charset != null ? charset : Consts.ASCII; this.ascii = this.charset.equals(Consts.ASCII); this.encoder = null; this.minChunkLimit = minChunkLimit >= 0 ? minChunkLimit : 512; this.metrics = createTransportMetrics(); this.onMalformedCharAction = malformedCharAction != null ? malformedCharAction : CodingErrorAction.REPORT; this.onUnmappableCharAction = unmappableCharAction != null? unmappableCharAction : CodingErrorAction.REPORT; }
protected void init(final OutputStream outstream, final int buffersize, final HttpParams params) { Args.notNull(outstream, "Input stream"); Args.notNegative(buffersize, "Buffer size"); Args.notNull(params, "HTTP parameters"); this.outstream = outstream; this.buffer = new ByteArrayBuffer(buffersize); final String charset = (String) params.getParameter(CoreProtocolPNames.HTTP_ELEMENT_CHARSET); this.charset = charset != null ? Charset.forName(charset) : Consts.ASCII; this.ascii = this.charset.equals(Consts.ASCII); this.encoder = null; this.minChunkLimit = params.getIntParameter(CoreConnectionPNames.MIN_CHUNK_LIMIT, 512); this.metrics = createTransportMetrics(); final CodingErrorAction a1 = (CodingErrorAction) params.getParameter( CoreProtocolPNames.HTTP_MALFORMED_INPUT_ACTION); this.onMalformedCharAction = a1 != null ? a1 : CodingErrorAction.REPORT; final CodingErrorAction a2 = (CodingErrorAction) params.getParameter( CoreProtocolPNames.HTTP_UNMAPPABLE_INPUT_ACTION); this.onUnmappableCharAction = a2 != null? a2 : CodingErrorAction.REPORT; }
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
/** * Returns the data for this frame, as it would be translated into * Unicode; this may omit a few bytes of raw data at the end, or add extra * at the start, to allow for the situation where a frame boundary occurs * inside a Unicode character. * @return the unicodeData */ private String getUnicodeData() { byte[] b; if (unicodePrefix.length != 0 || unicodeChopEnding != 0) { b = new byte[frameData.length + unicodePrefix.length - unicodeChopEnding]; System.arraycopy(unicodePrefix,0,b,0,unicodePrefix.length); System.arraycopy(frameData,0,b, unicodePrefix.length,frameData.length - unicodeChopEnding); } else b = frameData; String unicodeData; try { unicodeData = Charset.forName("UTF-8").newDecoder(). onMalformedInput(CodingErrorAction.REPORT). decode(ByteBuffer.wrap(b)).toString(); } catch (CharacterCodingException ex) { throw new RuntimeException("UTF-8 became invalid while we weren't looking at it"); } return unicodeData; }
/** * Converts the provided String to bytes using the * UTF-8 encoding. If <code>replace</code> is true, then * malformed input is replaced with the * substitution character, which is U+FFFD. Otherwise the * method throws a MalformedInputException. * @return ByteBuffer: bytes stores at ByteBuffer.array() * and length is ByteBuffer.limit() */ public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException { CharsetEncoder encoder = ENCODER_FACTORY.get(); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray())); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPORT); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return bytes; }
public String decode(String encodedFolderName) throws CharacterCodingException { CharsetDecoder decoder = modifiedUtf7Charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT); ByteBuffer byteBuffer = ByteBuffer.wrap(encodedFolderName.getBytes(asciiCharset)); CharBuffer charBuffer = decoder.decode(byteBuffer); return charBuffer.toString(); }
private static byte[] encodePassword(char[] pwd, Charset cs) throws IOException { ByteBuffer pwdBytes = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .encode(CharBuffer.wrap(pwd)); byte[] encoded = new byte[pwdBytes.remaining()]; pwdBytes.get(encoded); return encoded; }
public static CharsetEncoder createEncoder(final ConnectionConfig cconfig) { if (cconfig == null) { return null; } final Charset charset = cconfig.getCharset(); if (charset != null) { final CodingErrorAction malformed = cconfig.getMalformedInputAction(); final CodingErrorAction unmappable = cconfig.getUnmappableInputAction(); return charset.newEncoder() .onMalformedInput(malformed != null ? malformed : CodingErrorAction.REPORT) .onUnmappableCharacter(unmappable != null ? unmappable: CodingErrorAction.REPORT); } else { return null; } }
public CharsetDecoder prepareDecoder(Charset charset) { CharsetDecoder decoder = charset.newDecoder(); if (strict) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } else { decoder.onMalformedInput(CodingErrorAction.IGNORE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } return decoder; }
/** * Reset the character sequence - to read the file from the beginning. */ public void reset() { currentDecoder = prepareDecoder(charset); currentDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE); readBytes = 0; currentBuffer.clear(); currentStart = -1; }
/** * Reset the character sequence - to read the file from the beginning. */ public void reset() { decoder.reset(); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decodedBytes = 0; charBuffer.clear(); charBufferStartsAt = -1; charBufferEndsAt = -1; overflow = false; state = State.STANDARD; if (byteBuffer != null) { MatcherUtils.unmap(byteBuffer); byteBuffer = null; } }
@Override protected void implOnMalformedInput(CodingErrorAction action) { if (buffer != null || !initialized) { this.malformedInputAction = action; } else { currentDecoder.onMalformedInput(action); } }
@Override protected void implOnUnmappableCharacter(CodingErrorAction action) { if (buffer != null || !initialized) { this.unmappableCharAction = action; } else { currentDecoder.onUnmappableCharacter(action); } }
@Override protected void implOnMalformedInput(CodingErrorAction action) { if (buffer != null || !initialized) { malformedInputAction = action; } else { currentEncoder.onMalformedInput(action); } }
@Override protected void implOnUnmappableCharacter(CodingErrorAction action) { if (buffer != null || !initialized) { unmappableCharAction = action; } else { currentEncoder.onUnmappableCharacter(action); } }
private CharsetDecoder decoder() { if (dec == null) { dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); } return dec; }
public static ConnectionConfig getConnectionConfig(final HttpParams params) { final MessageConstraints messageConstraints = getMessageConstraints(params); final String csname = (String) params.getParameter(CoreProtocolPNames.HTTP_ELEMENT_CHARSET); return ConnectionConfig.custom() .setCharset(csname != null ? Charset.forName(csname) : null) .setMalformedInputAction((CodingErrorAction) params.getParameter(CoreProtocolPNames.HTTP_MALFORMED_INPUT_ACTION)) .setMalformedInputAction((CodingErrorAction) params.getParameter(CoreProtocolPNames.HTTP_UNMAPPABLE_INPUT_ACTION)) .setMessageConstraints(messageConstraints) .build(); }
/** Store the document in proper encoding. */ protected void saveFromKitToStream(StyledDocument doc, EditorKit kit, OutputStream out) throws IOException, BadLocationException { // not calling super. String enc = EncodingUtil.detectEncoding(doc); // saved form saveDocument() Charset cs = fileEncoding.get(); // + fallback, if no info is available if (cs == null) { if (enc != null) { cs = Charset.forName(enc); } else { // fallback to the original encoding, no encoding in document istelf. cs = FileEncodingQuery.getEncoding(getDataObject().getPrimaryFile()); } } if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("Saving using encoding");//, new RuntimeException (enc)); // NOI18N if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("!!! TextEditorSupport::saveFromKitToStream: enc = " + enc); if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("!!! ::saveFromKitToStream: after first test -> OK"); FilterOutputStream fos = new FilterOutputStream(out) { @Override public void close() throws IOException { flush(); } }; CharsetEncoder encoder = cs.newEncoder(); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); Writer w = new OutputStreamWriter (fos, encoder); if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("!!! ::saveFromKitToStream: writer = " + w); try { kit.write(w, doc, 0, doc.getLength()); } finally { w.close(); } }
public Builder setMalformedInputAction(final CodingErrorAction malformedInputAction) { this.malformedInputAction = malformedInputAction; if (malformedInputAction != null && this.charset == null) { this.charset = Consts.ASCII; } return this; }
private static synchronized int decodeCharFromBytes (byte[] inBytes, int inLen) { charsetDecoder.reset(); charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE); charsetDecoder.replaceWith(invalidKeyStr); ByteBuffer in = ByteBuffer.wrap(inBytes, 0, inLen); CharBuffer out = CharBuffer.allocate(1); charsetDecoder.decode(in, out, false); if (out.position() == 0) { return -1; } return out.get(0); }
/** * Construct a new {@link ReaderInputStream}. * * @param reader the target {@link Reader} * @param charset the charset encoding * @param bufferSize the size of the input buffer in number of characters */ public ReaderInputStream(Reader reader, Charset charset, int bufferSize) { this(reader, charset.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE), bufferSize); }
public C2BConverter(String encoding) throws IOException { encoder = B2CConverter.getCharset(encoding).newEncoder(); // FIXME: See if unmappable/malformed behavior configuration is needed // in practice encoder.onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); char[] left = new char[4]; leftovers = CharBuffer.wrap(left); }
/** * Creates a new input stream that will encode the characters from {@code reader} into bytes using * the given character set. Malformed input and unmappable characters will be replaced. * * @param reader input source * @param charset character set used for encoding chars to bytes * @param bufferSize size of internal input and output buffers * @throws IllegalArgumentException if bufferSize is non-positive */ ReaderInputStream(Reader reader, Charset charset, int bufferSize) { this( reader, charset .newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE), bufferSize); }
private static char[] decodePassword(byte[] pwdBytes, Charset encoding) throws IOException { CharBuffer pwdChars = encoding.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .decode(ByteBuffer.wrap(pwdBytes)); char[] result = new char[pwdChars.remaining()]; pwdChars.get(result); return result; }
/** * Constructor. * * @param s the input character sequence * @param charset the character set name to use * @param bufferSize the buffer size to use. */ public CharSequenceInputStream(final CharSequence s, final Charset charset, int bufferSize) { super(); this.encoder = charset.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); this.bbuf = ByteBuffer.allocate(bufferSize); this.bbuf.flip(); this.cbuf = CharBuffer.wrap(s); this.mark = -1; }
ConnectionConfig( final int bufferSize, final int fragmentSizeHint, final Charset charset, final CodingErrorAction malformedInputAction, final CodingErrorAction unmappableInputAction, final MessageConstraints messageConstraints) { super(); this.bufferSize = bufferSize; this.fragmentSizeHint = fragmentSizeHint; this.charset = charset; this.malformedInputAction = malformedInputAction; this.unmappableInputAction = unmappableInputAction; this.messageConstraints = messageConstraints; }
/** * Retrieves the CharsetDecoder for the given encoding. Note, This isn't perfect as I think ISCII-DEVANAGARI and * MICROSOFT-CP1251 etc are allowed... * * @param encoding Encoding to retrieve the CharsetDecoder for * @return CharSetDecoder for the given encoding */ private CharsetDecoder getJavaEncoding(String encoding) { if ("ISO8859-14".equals(encoding)) { return new ISO8859_14Decoder(); } String canon = CHARSET_ALIASES.get(encoding); if (canon != null) { encoding = canon; } Charset charset = Charset.forName(encoding); return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE); }
private CharsetDecoder decoder() { CharsetDecoder dec = decTL.get(); if (dec == null) { dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); decTL.set(dec); } return dec; }
/** * Obtains the value of the {@link CoreProtocolPNames#HTTP_UNMAPPABLE_INPUT_ACTION} parameter. * @param params HTTP parameters * @return Action to perform upon receiving a unmapped input * * @since 4.2 */ public static CodingErrorAction getUnmappableInputAction(final HttpParams params) { Args.notNull(params, "HTTP parameters"); final Object param = params.getParameter(CoreProtocolPNames.HTTP_UNMAPPABLE_INPUT_ACTION); if (param == null) { // the default CodingErrorAction return CodingErrorAction.REPORT; } return (CodingErrorAction) param; }