hg/bck2brwsr: rt/emul/compact/src/main/java/java/nio/charset/CharsetDecoder.java@c794024954b5

     1 /*

     2  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 // -- This file was mechanically generated: Do not edit! -- //

    28 package java.nio.charset;

    30 import java.nio.Buffer;

    31 import java.nio.ByteBuffer;

    32 import java.nio.CharBuffer;

    33 import java.nio.BufferOverflowException;

    34 import java.nio.BufferUnderflowException;

    35 import java.lang.ref.WeakReference;

    36 import java.nio.charset.CoderMalfunctionError;                  // javadoc

    39 /**

    40  * An engine that can transform a sequence of bytes in a specific charset into a sequence of

    41  * sixteen-bit Unicode characters.

    42  *

    43  * <a name="steps">

    44  *

    45  * <p> The input byte sequence is provided in a byte buffer or a series

    46  * of such buffers.  The output character sequence is written to a character buffer

    47  * or a series of such buffers.  A decoder should always be used by making

    48  * the following sequence of method invocations, hereinafter referred to as a

    49  * <i>decoding operation</i>:

    50  *

    51  * <ol>

    52  *

    53  *   <li><p> Reset the decoder via the {@link #reset reset} method, unless it

    54  *   has not been used before; </p></li>

    55  *

    56  *   <li><p> Invoke the {@link #decode decode} method zero or more times, as

    57  *   long as additional input may be available, passing <tt>false</tt> for the

    58  *   <tt>endOfInput</tt> argument and filling the input buffer and flushing the

    59  *   output buffer between invocations; </p></li>

    60  *

    61  *   <li><p> Invoke the {@link #decode decode} method one final time, passing

    62  *   <tt>true</tt> for the <tt>endOfInput</tt> argument; and then </p></li>

    63  *

    64  *   <li><p> Invoke the {@link #flush flush} method so that the decoder can

    65  *   flush any internal state to the output buffer. </p></li>

    66  *

    67  * </ol>

    68  *

    69  * Each invocation of the {@link #decode decode} method will decode as many

    70  * bytes as possible from the input buffer, writing the resulting characters

    71  * to the output buffer.  The {@link #decode decode} method returns when more

    72  * input is required, when there is not enough room in the output buffer, or

    73  * when a decoding error has occurred.  In each case a {@link CoderResult}

    74  * object is returned to describe the reason for termination.  An invoker can

    75  * examine this object and fill the input buffer, flush the output buffer, or

    76  * attempt to recover from a decoding error, as appropriate, and try again.

    77  *

    78  * <a name="ce">

    79  *

    80  * <p> There are two general types of decoding errors.  If the input byte

    81  * sequence is not legal for this charset then the input is considered <i>malformed</i>.  If

    82  * the input byte sequence is legal but cannot be mapped to a valid

    83  * Unicode character then an <i>unmappable character</i> has been encountered.

    84  *

    85  * <a name="cae">

    86  *

    87  * <p> How a decoding error is handled depends upon the action requested for

    88  * that type of error, which is described by an instance of the {@link

    89  * CodingErrorAction} class.  The possible error actions are to {@link

    90  * CodingErrorAction#IGNORE </code>ignore<code>} the erroneous input, {@link

    91  * CodingErrorAction#REPORT </code>report<code>} the error to the invoker via

    92  * the returned {@link CoderResult} object, or {@link CodingErrorAction#REPLACE

    93  * </code>replace<code>} the erroneous input with the current value of the

    94  * replacement string.  The replacement

    95  *

   101  * has the initial value <tt>"&#92;uFFFD"</tt>;

   103  *

   104  * its value may be changed via the {@link #replaceWith(java.lang.String)

   105  * replaceWith} method.

   106  *

   107  * <p> The default action for malformed-input and unmappable-character errors

   108  * is to {@link CodingErrorAction#REPORT </code>report<code>} them.  The

   109  * malformed-input error action may be changed via the {@link

   110  * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the

   111  * unmappable-character action may be changed via the {@link

   112  * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method.

   113  *

   114  * <p> This class is designed to handle many of the details of the decoding

   115  * process, including the implementation of error actions.  A decoder for a

   116  * specific charset, which is a concrete subclass of this class, need only

   117  * implement the abstract {@link #decodeLoop decodeLoop} method, which

   118  * encapsulates the basic decoding loop.  A subclass that maintains internal

   119  * state should, additionally, override the {@link #implFlush implFlush} and

   120  * {@link #implReset implReset} methods.

   121  *

   122  * <p> Instances of this class are not safe for use by multiple concurrent

   123  * threads.  </p>

   124  *

   125  *

   126  * @author Mark Reinhold

   127  * @author JSR-51 Expert Group

   128  * @since 1.4

   129  *

   130  * @see ByteBuffer

   131  * @see CharBuffer

   132  * @see Charset

   133  * @see CharsetEncoder

   134  */

   136 public abstract class CharsetDecoder {

   138     private final Charset charset;

   139     private final float averageCharsPerByte;

   140     private final float maxCharsPerByte;

   142     private String replacement;

   143     private CodingErrorAction malformedInputAction

   144         = CodingErrorAction.REPORT;

   145     private CodingErrorAction unmappableCharacterAction

   146         = CodingErrorAction.REPORT;

   148     // Internal states

   149     //

   150     private static final int ST_RESET   = 0;

   151     private static final int ST_CODING  = 1;

   152     private static final int ST_END     = 2;

   153     private static final int ST_FLUSHED = 3;

   155     private int state = ST_RESET;

   157     private static String stateNames[]

   158         = { "RESET", "CODING", "CODING_END", "FLUSHED" };

   161     /**

   162      * Initializes a new decoder.  The new decoder will have the given

   163      * chars-per-byte and replacement values. </p>

   164      *

   165      * @param  averageCharsPerByte

   166      *         A positive float value indicating the expected number of

   167      *         characters that will be produced for each input byte

   168      *

   169      * @param  maxCharsPerByte

   170      *         A positive float value indicating the maximum number of

   171      *         characters that will be produced for each input byte

   172      *

   173      * @param  replacement

   174      *         The initial replacement; must not be <tt>null</tt>, must have

   175      *         non-zero length, must not be longer than maxCharsPerByte,

   176      *         and must be {@link #isLegalReplacement </code>legal<code>}

   177      *

   178      * @throws  IllegalArgumentException

   179      *          If the preconditions on the parameters do not hold

   180      */

   181     private

   182     CharsetDecoder(Charset cs,

   183                    float averageCharsPerByte,

   184                    float maxCharsPerByte,

   185                    String replacement)

   186     {

   187         this.charset = cs;

   188         if (averageCharsPerByte <= 0.0f)

   189             throw new IllegalArgumentException("Non-positive "

   190                                                + "averageCharsPerByte");

   191         if (maxCharsPerByte <= 0.0f)

   192             throw new IllegalArgumentException("Non-positive "

   193                                                + "maxCharsPerByte");

   194         if (!Charset.atBugLevel("1.4")) {

   195             if (averageCharsPerByte > maxCharsPerByte)

   196                 throw new IllegalArgumentException("averageCharsPerByte"

   197                                                    + " exceeds "

   198                                                    + "maxCharsPerByte");

   199         }

   200         this.replacement = replacement;

   201         this.averageCharsPerByte = averageCharsPerByte;

   202         this.maxCharsPerByte = maxCharsPerByte;

   203         replaceWith(replacement);

   204     }

   206     /**

   207      * Initializes a new decoder.  The new decoder will have the given

   208      * chars-per-byte values and its replacement will be the

   209      * string <tt>"&#92;uFFFD"</tt>. </p>

   210      *

   211      * @param  averageCharsPerByte

   212      *         A positive float value indicating the expected number of

   213      *         characters that will be produced for each input byte

   214      *

   215      * @param  maxCharsPerByte

   216      *         A positive float value indicating the maximum number of

   217      *         characters that will be produced for each input byte

   218      *

   219      * @throws  IllegalArgumentException

   220      *          If the preconditions on the parameters do not hold

   221      */

   222     protected CharsetDecoder(Charset cs,

   223                              float averageCharsPerByte,

   224                              float maxCharsPerByte)

   225     {

   226         this(cs,

   227              averageCharsPerByte, maxCharsPerByte,

   228              "\uFFFD");

   229     }

   231     /**

   232      * Returns the charset that created this decoder.  </p>

   233      *

   234      * @return  This decoder's charset

   235      */

   236     public final Charset charset() {

   237         return charset;

   238     }

   240     /**

   241      * Returns this decoder's replacement value. </p>

   242      *

   243      * @return  This decoder's current replacement,

   244      *          which is never <tt>null</tt> and is never empty

   245      */

   246     public final String replacement() {

   247         return replacement;

   248     }

   250     /**

   251      * Changes this decoder's replacement value.

   252      *

   253      * <p> This method invokes the {@link #implReplaceWith implReplaceWith}

   254      * method, passing the new replacement, after checking that the new

   255      * replacement is acceptable.  </p>

   256      *

   257      * @param  newReplacement

   258      *

   260      *         The new replacement; must not be <tt>null</tt>

   261      *         and must have non-zero length

   269      *

   270      * @return  This decoder

   271      *

   272      * @throws  IllegalArgumentException

   273      *          If the preconditions on the parameter do not hold

   274      */

   275     public final CharsetDecoder replaceWith(String newReplacement) {

   276         if (newReplacement == null)

   277             throw new IllegalArgumentException("Null replacement");

   278         int len = newReplacement.length();

   279         if (len == 0)

   280             throw new IllegalArgumentException("Empty replacement");

   281         if (len > maxCharsPerByte)

   282             throw new IllegalArgumentException("Replacement too long");

   287         this.replacement = newReplacement;

   288         implReplaceWith(newReplacement);

   289         return this;

   290     }

   292     /**

   293      * Reports a change to this decoder's replacement value.

   294      *

   295      * <p> The default implementation of this method does nothing.  This method

   296      * should be overridden by decoders that require notification of changes to

   297      * the replacement.  </p>

   298      *

   299      * @param  newReplacement

   300      */

   301     protected void implReplaceWith(String newReplacement) {

   302     }

   344     /**

   345      * Returns this decoder's current action for malformed-input errors.  </p>

   346      *

   347      * @return The current malformed-input action, which is never <tt>null</tt>

   348      */

   349     public CodingErrorAction malformedInputAction() {

   350         return malformedInputAction;

   351     }

   353     /**

   354      * Changes this decoder's action for malformed-input errors.  </p>

   355      *

   356      * <p> This method invokes the {@link #implOnMalformedInput

   357      * implOnMalformedInput} method, passing the new action.  </p>

   358      *

   359      * @param  newAction  The new action; must not be <tt>null</tt>

   360      *

   361      * @return  This decoder

   362      *

   363      * @throws IllegalArgumentException

   364      *         If the precondition on the parameter does not hold

   365      */

   366     public final CharsetDecoder onMalformedInput(CodingErrorAction newAction) {

   367         if (newAction == null)

   368             throw new IllegalArgumentException("Null action");

   369         malformedInputAction = newAction;

   370         implOnMalformedInput(newAction);

   371         return this;

   372     }

   374     /**

   375      * Reports a change to this decoder's malformed-input action.

   376      *

   377      * <p> The default implementation of this method does nothing.  This method

   378      * should be overridden by decoders that require notification of changes to

   379      * the malformed-input action.  </p>

   380      */

   381     protected void implOnMalformedInput(CodingErrorAction newAction) { }

   383     /**

   384      * Returns this decoder's current action for unmappable-character errors.

   385      * </p>

   386      *

   387      * @return The current unmappable-character action, which is never

   388      *         <tt>null</tt>

   389      */

   390     public CodingErrorAction unmappableCharacterAction() {

   391         return unmappableCharacterAction;

   392     }

   394     /**

   395      * Changes this decoder's action for unmappable-character errors.

   396      *

   397      * <p> This method invokes the {@link #implOnUnmappableCharacter

   398      * implOnUnmappableCharacter} method, passing the new action.  </p>

   399      *

   400      * @param  newAction  The new action; must not be <tt>null</tt>

   401      *

   402      * @return  This decoder

   403      *

   404      * @throws IllegalArgumentException

   405      *         If the precondition on the parameter does not hold

   406      */

   407     public final CharsetDecoder onUnmappableCharacter(CodingErrorAction

   408                                                       newAction)

   409     {

   410         if (newAction == null)

   411             throw new IllegalArgumentException("Null action");

   412         unmappableCharacterAction = newAction;

   413         implOnUnmappableCharacter(newAction);

   414         return this;

   415     }

   417     /**

   418      * Reports a change to this decoder's unmappable-character action.

   419      *

   420      * <p> The default implementation of this method does nothing.  This method

   421      * should be overridden by decoders that require notification of changes to

   422      * the unmappable-character action.  </p>

   423      */

   424     protected void implOnUnmappableCharacter(CodingErrorAction newAction) { }

   426     /**

   427      * Returns the average number of characters that will be produced for each

   428      * byte of input.  This heuristic value may be used to estimate the size

   429      * of the output buffer required for a given input sequence. </p>

   430      *

   431      * @return  The average number of characters produced

   432      *          per byte of input

   433      */

   434     public final float averageCharsPerByte() {

   435         return averageCharsPerByte;

   436     }

   438     /**

   439      * Returns the maximum number of characters that will be produced for each

   440      * byte of input.  This value may be used to compute the worst-case size

   441      * of the output buffer required for a given input sequence. </p>

   442      *

   443      * @return  The maximum number of characters that will be produced per

   444      *          byte of input

   445      */

   446     public final float maxCharsPerByte() {

   447         return maxCharsPerByte;

   448     }

   450     /**

   451      * Decodes as many bytes as possible from the given input buffer,

   452      * writing the results to the given output buffer.

   453      *

   454      * <p> The buffers are read from, and written to, starting at their current

   455      * positions.  At most {@link Buffer#remaining in.remaining()} bytes

   456      * will be read and at most {@link Buffer#remaining out.remaining()}

   457      * characters will be written.  The buffers' positions will be advanced to

   458      * reflect the bytes read and the characters written, but their marks and

   459      * limits will not be modified.

   460      *

   461      * <p> In addition to reading bytes from the input buffer and writing

   462      * characters to the output buffer, this method returns a {@link CoderResult}

   463      * object to describe its reason for termination:

   464      *

   465      * <ul>

   466      *

   467      *   <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the

   468      *   input buffer as possible has been decoded.  If there is no further

   469      *   input then the invoker can proceed to the next step of the

   470      *   <a href="#steps">decoding operation</a>.  Otherwise this method

   471      *   should be invoked again with further input.  </p></li>

   472      *

   473      *   <li><p> {@link CoderResult#OVERFLOW} indicates that there is

   474      *   insufficient space in the output buffer to decode any more bytes.

   475      *   This method should be invoked again with an output buffer that has

   476      *   more {@linkplain Buffer#remaining remaining} characters. This is

   477      *   typically done by draining any decoded characters from the output

   478      *   buffer.  </p></li>

   479      *

   480      *   <li><p> A {@link CoderResult#malformedForLength

   481      *   </code>malformed-input<code>} result indicates that a malformed-input

   482      *   error has been detected.  The malformed bytes begin at the input

   483      *   buffer's (possibly incremented) position; the number of malformed

   484      *   bytes may be determined by invoking the result object's {@link

   485      *   CoderResult#length() length} method.  This case applies only if the

   486      *   {@link #onMalformedInput </code>malformed action<code>} of this decoder

   487      *   is {@link CodingErrorAction#REPORT}; otherwise the malformed input

   488      *   will be ignored or replaced, as requested.  </p></li>

   489      *

   490      *   <li><p> An {@link CoderResult#unmappableForLength

   491      *   </code>unmappable-character<code>} result indicates that an

   492      *   unmappable-character error has been detected.  The bytes that

   493      *   decode the unmappable character begin at the input buffer's (possibly

   494      *   incremented) position; the number of such bytes may be determined

   495      *   by invoking the result object's {@link CoderResult#length() length}

   496      *   method.  This case applies only if the {@link #onUnmappableCharacter

   497      *   </code>unmappable action<code>} of this decoder is {@link

   498      *   CodingErrorAction#REPORT}; otherwise the unmappable character will be

   499      *   ignored or replaced, as requested.  </p></li>

   500      *

   501      * </ul>

   502      *

   503      * In any case, if this method is to be reinvoked in the same decoding

   504      * operation then care should be taken to preserve any bytes remaining

   505      * in the input buffer so that they are available to the next invocation.

   506      *

   507      * <p> The <tt>endOfInput</tt> parameter advises this method as to whether

   508      * the invoker can provide further input beyond that contained in the given

   509      * input buffer.  If there is a possibility of providing additional input

   510      * then the invoker should pass <tt>false</tt> for this parameter; if there

   511      * is no possibility of providing further input then the invoker should

   512      * pass <tt>true</tt>.  It is not erroneous, and in fact it is quite

   513      * common, to pass <tt>false</tt> in one invocation and later discover that

   514      * no further input was actually available.  It is critical, however, that

   515      * the final invocation of this method in a sequence of invocations always

   516      * pass <tt>true</tt> so that any remaining undecoded input will be treated

   517      * as being malformed.

   518      *

   519      * <p> This method works by invoking the {@link #decodeLoop decodeLoop}

   520      * method, interpreting its results, handling error conditions, and

   521      * reinvoking it as necessary.  </p>

   522      *

   523      *

   524      * @param  in

   525      *         The input byte buffer

   526      *

   527      * @param  out

   528      *         The output character buffer

   529      *

   530      * @param  endOfInput

   531      *         <tt>true</tt> if, and only if, the invoker can provide no

   532      *         additional input bytes beyond those in the given buffer

   533      *

   534      * @return  A coder-result object describing the reason for termination

   535      *

   536      * @throws  IllegalStateException

   537      *          If a decoding operation is already in progress and the previous

   538      *          step was an invocation neither of the {@link #reset reset}

   539      *          method, nor of this method with a value of <tt>false</tt> for

   540      *          the <tt>endOfInput</tt> parameter, nor of this method with a

   541      *          value of <tt>true</tt> for the <tt>endOfInput</tt> parameter

   542      *          but a return value indicating an incomplete decoding operation

   543      *

   544      * @throws  CoderMalfunctionError

   545      *          If an invocation of the decodeLoop method threw

   546      *          an unexpected exception

   547      */

   548     public final CoderResult decode(ByteBuffer in, CharBuffer out,

   549                                     boolean endOfInput)

   550     {

   551         int newState = endOfInput ? ST_END : ST_CODING;

   552         if ((state != ST_RESET) && (state != ST_CODING)

   553             && !(endOfInput && (state == ST_END)))

   554             throwIllegalStateException(state, newState);

   555         state = newState;

   557         for (;;) {

   559             CoderResult cr;

   560             try {

   561                 cr = decodeLoop(in, out);

   562             } catch (BufferUnderflowException x) {

   563                 throw new CoderMalfunctionError(x);

   564             } catch (BufferOverflowException x) {

   565                 throw new CoderMalfunctionError(x);

   566             }

   568             if (cr.isOverflow())

   569                 return cr;

   571             if (cr.isUnderflow()) {

   572                 if (endOfInput && in.hasRemaining()) {

   573                     cr = CoderResult.malformedForLength(in.remaining());

   574                     // Fall through to malformed-input case

   575                 } else {

   576                     return cr;

   577                 }

   578             }

   580             CodingErrorAction action = null;

   581             if (cr.isMalformed())

   582                 action = malformedInputAction;

   583             else if (cr.isUnmappable())

   584                 action = unmappableCharacterAction;

   585             else

   586                 assert false : cr.toString();

   588             if (action == CodingErrorAction.REPORT)

   589                 return cr;

   591             if (action == CodingErrorAction.REPLACE) {

   592                 if (out.remaining() < replacement.length())

   593                     return CoderResult.OVERFLOW;

   594                 out.put(replacement);

   595             }

   597             if ((action == CodingErrorAction.IGNORE)

   598                 || (action == CodingErrorAction.REPLACE)) {

   599                 // Skip erroneous input either way

   600                 in.position(in.position() + cr.length());

   601                 continue;

   602             }

   604             assert false;

   605         }

   607     }

   609     /**

   610      * Flushes this decoder.

   611      *

   612      * <p> Some decoders maintain internal state and may need to write some

   613      * final characters to the output buffer once the overall input sequence has

   614      * been read.

   615      *

   616      * <p> Any additional output is written to the output buffer beginning at

   617      * its current position.  At most {@link Buffer#remaining out.remaining()}

   618      * characters will be written.  The buffer's position will be advanced

   619      * appropriately, but its mark and limit will not be modified.

   620      *

   621      * <p> If this method completes successfully then it returns {@link

   622      * CoderResult#UNDERFLOW}.  If there is insufficient room in the output

   623      * buffer then it returns {@link CoderResult#OVERFLOW}.  If this happens

   624      * then this method must be invoked again, with an output buffer that has

   625      * more room, in order to complete the current <a href="#steps">decoding

   626      * operation</a>.

   627      *

   628      * <p> If this decoder has already been flushed then invoking this method

   629      * has no effect.

   630      *

   631      * <p> This method invokes the {@link #implFlush implFlush} method to

   632      * perform the actual flushing operation.  </p>

   633      *

   634      * @param  out

   635      *         The output character buffer

   636      *

   637      * @return  A coder-result object, either {@link CoderResult#UNDERFLOW} or

   638      *          {@link CoderResult#OVERFLOW}

   639      *

   640      * @throws  IllegalStateException

   641      *          If the previous step of the current decoding operation was an

   642      *          invocation neither of the {@link #flush flush} method nor of

   643      *          the three-argument {@link

   644      *          #decode(ByteBuffer,CharBuffer,boolean) decode} method

   645      *          with a value of <tt>true</tt> for the <tt>endOfInput</tt>

   646      *          parameter

   647      */

   648     public final CoderResult flush(CharBuffer out) {

   649         if (state == ST_END) {

   650             CoderResult cr = implFlush(out);

   651             if (cr.isUnderflow())

   652                 state = ST_FLUSHED;

   653             return cr;

   654         }

   656         if (state != ST_FLUSHED)

   657             throwIllegalStateException(state, ST_FLUSHED);

   659         return CoderResult.UNDERFLOW; // Already flushed

   660     }

   662     /**

   663      * Flushes this decoder.

   664      *

   665      * <p> The default implementation of this method does nothing, and always

   666      * returns {@link CoderResult#UNDERFLOW}.  This method should be overridden

   667      * by decoders that may need to write final characters to the output buffer

   668      * once the entire input sequence has been read. </p>

   669      *

   670      * @param  out

   671      *         The output character buffer

   672      *

   673      * @return  A coder-result object, either {@link CoderResult#UNDERFLOW} or

   674      *          {@link CoderResult#OVERFLOW}

   675      */

   676     protected CoderResult implFlush(CharBuffer out) {

   677         return CoderResult.UNDERFLOW;

   678     }

   680     /**

   681      * Resets this decoder, clearing any internal state.

   682      *

   683      * <p> This method resets charset-independent state and also invokes the

   684      * {@link #implReset() implReset} method in order to perform any

   685      * charset-specific reset actions.  </p>

   686      *

   687      * @return  This decoder

   688      *

   689      */

   690     public final CharsetDecoder reset() {

   691         implReset();

   692         state = ST_RESET;

   693         return this;

   694     }

   696     /**

   697      * Resets this decoder, clearing any charset-specific internal state.

   698      *

   699      * <p> The default implementation of this method does nothing.  This method

   700      * should be overridden by decoders that maintain internal state.  </p>

   701      */

   702     protected void implReset() { }

   704     /**

   705      * Decodes one or more bytes into one or more characters.

   706      *

   707      * <p> This method encapsulates the basic decoding loop, decoding as many

   708      * bytes as possible until it either runs out of input, runs out of room

   709      * in the output buffer, or encounters a decoding error.  This method is

   710      * invoked by the {@link #decode decode} method, which handles result

   711      * interpretation and error recovery.

   712      *

   713      * <p> The buffers are read from, and written to, starting at their current

   714      * positions.  At most {@link Buffer#remaining in.remaining()} bytes

   715      * will be read, and at most {@link Buffer#remaining out.remaining()}

   716      * characters will be written.  The buffers' positions will be advanced to

   717      * reflect the bytes read and the characters written, but their marks and

   718      * limits will not be modified.

   719      *

   720      * <p> This method returns a {@link CoderResult} object to describe its

   721      * reason for termination, in the same manner as the {@link #decode decode}

   722      * method.  Most implementations of this method will handle decoding errors

   723      * by returning an appropriate result object for interpretation by the

   724      * {@link #decode decode} method.  An optimized implementation may instead

   725      * examine the relevant error action and implement that action itself.

   726      *

   727      * <p> An implementation of this method may perform arbitrary lookahead by

   728      * returning {@link CoderResult#UNDERFLOW} until it receives sufficient

   729      * input.  </p>

   730      *

   731      * @param  in

   732      *         The input byte buffer

   733      *

   734      * @param  out

   735      *         The output character buffer

   736      *

   737      * @return  A coder-result object describing the reason for termination

   738      */

   739     protected abstract CoderResult decodeLoop(ByteBuffer in,

   740                                               CharBuffer out);

   742     /**

   743      * Convenience method that decodes the remaining content of a single input

   744      * byte buffer into a newly-allocated character buffer.

   745      *

   746      * <p> This method implements an entire <a href="#steps">decoding

   747      * operation</a>; that is, it resets this decoder, then it decodes the

   748      * bytes in the given byte buffer, and finally it flushes this

   749      * decoder.  This method should therefore not be invoked if a decoding

   750      * operation is already in progress.  </p>

   751      *

   752      * @param  in

   753      *         The input byte buffer

   754      *

   755      * @return A newly-allocated character buffer containing the result of the

   756      *         decoding operation.  The buffer's position will be zero and its

   757      *         limit will follow the last character written.

   758      *

   759      * @throws  IllegalStateException

   760      *          If a decoding operation is already in progress

   761      *

   762      * @throws  MalformedInputException

   763      *          If the byte sequence starting at the input buffer's current

   764      *          position is not legal for this charset and the current malformed-input action

   765      *          is {@link CodingErrorAction#REPORT}

   766      *

   767      * @throws  UnmappableCharacterException

   768      *          If the byte sequence starting at the input buffer's current

   769      *          position cannot be mapped to an equivalent character sequence and

   770      *          the current unmappable-character action is {@link

   771      *          CodingErrorAction#REPORT}

   772      */

   773     public final CharBuffer decode(ByteBuffer in)

   774         throws CharacterCodingException

   775     {

   776         int n = (int)(in.remaining() * averageCharsPerByte());

   777         CharBuffer out = CharBuffer.allocate(n);

   779         if ((n == 0) && (in.remaining() == 0))

   780             return out;

   781         reset();

   782         for (;;) {

   783             CoderResult cr = in.hasRemaining() ?

   784                 decode(in, out, true) : CoderResult.UNDERFLOW;

   785             if (cr.isUnderflow())

   786                 cr = flush(out);

   788             if (cr.isUnderflow())

   789                 break;

   790             if (cr.isOverflow()) {

   791                 n = 2*n + 1;    // Ensure progress; n might be 0!

   792                 CharBuffer o = CharBuffer.allocate(n);

   793                 out.flip();

   794                 o.put(out);

   795                 out = o;

   796                 continue;

   797             }

   798             cr.throwException();

   799         }

   800         out.flip();

   801         return out;

   802     }

   806     /**

   807      * Tells whether or not this decoder implements an auto-detecting charset.

   808      *

   809      * <p> The default implementation of this method always returns

   810      * <tt>false</tt>; it should be overridden by auto-detecting decoders to

   811      * return <tt>true</tt>.  </p>

   812      *

   813      * @return  <tt>true</tt> if, and only if, this decoder implements an

   814      *          auto-detecting charset

   815      */

   816     public boolean isAutoDetecting() {

   817         return false;

   818     }

   820     /**

   821      * Tells whether or not this decoder has yet detected a

   822      * charset&nbsp;&nbsp;<i>(optional operation)</i>.

   823      *

   824      * <p> If this decoder implements an auto-detecting charset then at a

   825      * single point during a decoding operation this method may start returning

   826      * <tt>true</tt> to indicate that a specific charset has been detected in

   827      * the input byte sequence.  Once this occurs, the {@link #detectedCharset

   828      * detectedCharset} method may be invoked to retrieve the detected charset.

   829      *

   830      * <p> That this method returns <tt>false</tt> does not imply that no bytes

   831      * have yet been decoded.  Some auto-detecting decoders are capable of

   832      * decoding some, or even all, of an input byte sequence without fixing on

   833      * a particular charset.

   834      *

   835      * <p> The default implementation of this method always throws an {@link

   836      * UnsupportedOperationException}; it should be overridden by

   837      * auto-detecting decoders to return <tt>true</tt> once the input charset

   838      * has been determined.  </p>

   839      *

   840      * @return  <tt>true</tt> if, and only if, this decoder has detected a

   841      *          specific charset

   842      *

   843      * @throws  UnsupportedOperationException

   844      *          If this decoder does not implement an auto-detecting charset

   845      */

   846     public boolean isCharsetDetected() {

   847         throw new UnsupportedOperationException();

   848     }

   850     /**

   851      * Retrieves the charset that was detected by this

   852      * decoder&nbsp;&nbsp;<i>(optional operation)</i>.

   853      *

   854      * <p> If this decoder implements an auto-detecting charset then this

   855      * method returns the actual charset once it has been detected.  After that

   856      * point, this method returns the same value for the duration of the

   857      * current decoding operation.  If not enough input bytes have yet been

   858      * read to determine the actual charset then this method throws an {@link

   859      * IllegalStateException}.

   860      *

   861      * <p> The default implementation of this method always throws an {@link

   862      * UnsupportedOperationException}; it should be overridden by

   863      * auto-detecting decoders to return the appropriate value.  </p>

   864      *

   865      * @return  The charset detected by this auto-detecting decoder,

   866      *          or <tt>null</tt> if the charset has not yet been determined

   867      *

   868      * @throws  IllegalStateException

   869      *          If insufficient bytes have been read to determine a charset

   870      *

   871      * @throws  UnsupportedOperationException

   872      *          If this decoder does not implement an auto-detecting charset

   873      */

   874     public Charset detectedCharset() {

   875         throw new UnsupportedOperationException();

   876     }

   967     private void throwIllegalStateException(int from, int to) {

   968         throw new IllegalStateException("Current state = " + stateNames[from]

   969                                         + ", new state = " + stateNames[to]);

   970     }

   972 }

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Thu, 03 Oct 2013 17:36:44 +0200
changeset 1337	c794024954b5
parent 1334	588d5bf7a560
child 1343	802e5d2da9f6
permissions	-rw-r--r--