hg/bck2brwsr: emul/src/main/java/java/lang/Character.java@3884815c0629

     1 /*

     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package java.lang;

    28 /**

    29  * The {@code Character} class wraps a value of the primitive

    30  * type {@code char} in an object. An object of type

    31  * {@code Character} contains a single field whose type is

    32  * {@code char}.

    33  * <p>

    34  * In addition, this class provides several methods for determining

    35  * a character's category (lowercase letter, digit, etc.) and for converting

    36  * characters from uppercase to lowercase and vice versa.

    37  * <p>

    38  * Character information is based on the Unicode Standard, version 6.0.0.

    39  * <p>

    40  * The methods and data of class {@code Character} are defined by

    41  * the information in the <i>UnicodeData</i> file that is part of the

    42  * Unicode Character Database maintained by the Unicode

    43  * Consortium. This file specifies various properties including name

    44  * and general category for every defined Unicode code point or

    45  * character range.

    46  * <p>

    47  * The file and its description are available from the Unicode Consortium at:

    48  * <ul>

    49  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>

    50  * </ul>

    51  *

    52  * <h4><a name="unicode">Unicode Character Representations</a></h4>

    53  *

    54  * <p>The {@code char} data type (and therefore the value that a

    55  * {@code Character} object encapsulates) are based on the

    56  * original Unicode specification, which defined characters as

    57  * fixed-width 16-bit entities. The Unicode Standard has since been

    58  * changed to allow for characters whose representation requires more

    59  * than 16 bits.  The range of legal <em>code point</em>s is now

    60  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.

    61  * (Refer to the <a

    62  * href="http://www.unicode.org/reports/tr27/#notation"><i>

    63  * definition</i></a> of the U+<i>n</i> notation in the Unicode

    64  * Standard.)

    65  *

    66  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is

    67  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.

    68  * <a name="supplementary">Characters</a> whose code points are greater

    69  * than U+FFFF are called <em>supplementary character</em>s.  The Java

    70  * platform uses the UTF-16 representation in {@code char} arrays and

    71  * in the {@code String} and {@code StringBuffer} classes. In

    72  * this representation, supplementary characters are represented as a pair

    73  * of {@code char} values, the first from the <em>high-surrogates</em>

    74  * range, (&#92;uD800-&#92;uDBFF), the second from the

    75  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).

    76  *

    77  * <p>A {@code char} value, therefore, represents Basic

    78  * Multilingual Plane (BMP) code points, including the surrogate

    79  * code points, or code units of the UTF-16 encoding. An

    80  * {@code int} value represents all Unicode code points,

    81  * including supplementary code points. The lower (least significant)

    82  * 21 bits of {@code int} are used to represent Unicode code

    83  * points and the upper (most significant) 11 bits must be zero.

    84  * Unless otherwise specified, the behavior with respect to

    85  * supplementary characters and surrogate {@code char} values is

    86  * as follows:

    87  *

    88  * <ul>

    89  * <li>The methods that only accept a {@code char} value cannot support

    90  * supplementary characters. They treat {@code char} values from the

    91  * surrogate ranges as undefined characters. For example,

    92  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though

    93  * this specific value if followed by any low-surrogate value in a string

    94  * would represent a letter.

    95  *

    96  * <li>The methods that accept an {@code int} value support all

    97  * Unicode characters, including supplementary characters. For

    98  * example, {@code Character.isLetter(0x2F81A)} returns

    99  * {@code true} because the code point value represents a letter

   100  * (a CJK ideograph).

   101  * </ul>

   102  *

   103  * <p>In the Java SE API documentation, <em>Unicode code point</em> is

   104  * used for character values in the range between U+0000 and U+10FFFF,

   105  * and <em>Unicode code unit</em> is used for 16-bit

   106  * {@code char} values that are code units of the <em>UTF-16</em>

   107  * encoding. For more information on Unicode terminology, refer to the

   108  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.

   109  *

   110  * @author  Lee Boynton

   111  * @author  Guy Steele

   112  * @author  Akira Tanaka

   113  * @author  Martin Buchholz

   114  * @author  Ulf Zibis

   115  * @since   1.0

   116  */

   117 public final

   118 class Character implements java.io.Serializable, Comparable<Character> {

   119     /**

   120      * The minimum radix available for conversion to and from strings.

   121      * The constant value of this field is the smallest value permitted

   122      * for the radix argument in radix-conversion methods such as the

   123      * {@code digit} method, the {@code forDigit} method, and the

   124      * {@code toString} method of class {@code Integer}.

   125      *

   126      * @see     Character#digit(char, int)

   127      * @see     Character#forDigit(int, int)

   128      * @see     Integer#toString(int, int)

   129      * @see     Integer#valueOf(String)

   130      */

   131     public static final int MIN_RADIX = 2;

   133     /**

   134      * The maximum radix available for conversion to and from strings.

   135      * The constant value of this field is the largest value permitted

   136      * for the radix argument in radix-conversion methods such as the

   137      * {@code digit} method, the {@code forDigit} method, and the

   138      * {@code toString} method of class {@code Integer}.

   139      *

   140      * @see     Character#digit(char, int)

   141      * @see     Character#forDigit(int, int)

   142      * @see     Integer#toString(int, int)

   143      * @see     Integer#valueOf(String)

   144      */

   145     public static final int MAX_RADIX = 36;

   147     /**

   148      * The constant value of this field is the smallest value of type

   149      * {@code char}, {@code '\u005Cu0000'}.

   150      *

   151      * @since   1.0.2

   152      */

   153     public static final char MIN_VALUE = '\u0000';

   155     /**

   156      * The constant value of this field is the largest value of type

   157      * {@code char}, {@code '\u005CuFFFF'}.

   158      *

   159      * @since   1.0.2

   160      */

   161     public static final char MAX_VALUE = '\uFFFF';

   163     /**

   164      * The {@code Class} instance representing the primitive type

   165      * {@code char}.

   166      *

   167      * @since   1.1

   168      */

   169     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");

   171     /*

   172      * Normative general types

   173      */

   175     /*

   176      * General character types

   177      */

   179     /**

   180      * General category "Cn" in the Unicode specification.

   181      * @since   1.1

   182      */

   183     public static final byte UNASSIGNED = 0;

   185     /**

   186      * General category "Lu" in the Unicode specification.

   187      * @since   1.1

   188      */

   189     public static final byte UPPERCASE_LETTER = 1;

   191     /**

   192      * General category "Ll" in the Unicode specification.

   193      * @since   1.1

   194      */

   195     public static final byte LOWERCASE_LETTER = 2;

   197     /**

   198      * General category "Lt" in the Unicode specification.

   199      * @since   1.1

   200      */

   201     public static final byte TITLECASE_LETTER = 3;

   203     /**

   204      * General category "Lm" in the Unicode specification.

   205      * @since   1.1

   206      */

   207     public static final byte MODIFIER_LETTER = 4;

   209     /**

   210      * General category "Lo" in the Unicode specification.

   211      * @since   1.1

   212      */

   213     public static final byte OTHER_LETTER = 5;

   215     /**

   216      * General category "Mn" in the Unicode specification.

   217      * @since   1.1

   218      */

   219     public static final byte NON_SPACING_MARK = 6;

   221     /**

   222      * General category "Me" in the Unicode specification.

   223      * @since   1.1

   224      */

   225     public static final byte ENCLOSING_MARK = 7;

   227     /**

   228      * General category "Mc" in the Unicode specification.

   229      * @since   1.1

   230      */

   231     public static final byte COMBINING_SPACING_MARK = 8;

   233     /**

   234      * General category "Nd" in the Unicode specification.

   235      * @since   1.1

   236      */

   237     public static final byte DECIMAL_DIGIT_NUMBER        = 9;

   239     /**

   240      * General category "Nl" in the Unicode specification.

   241      * @since   1.1

   242      */

   243     public static final byte LETTER_NUMBER = 10;

   245     /**

   246      * General category "No" in the Unicode specification.

   247      * @since   1.1

   248      */

   249     public static final byte OTHER_NUMBER = 11;

   251     /**

   252      * General category "Zs" in the Unicode specification.

   253      * @since   1.1

   254      */

   255     public static final byte SPACE_SEPARATOR = 12;

   257     /**

   258      * General category "Zl" in the Unicode specification.

   259      * @since   1.1

   260      */

   261     public static final byte LINE_SEPARATOR = 13;

   263     /**

   264      * General category "Zp" in the Unicode specification.

   265      * @since   1.1

   266      */

   267     public static final byte PARAGRAPH_SEPARATOR = 14;

   269     /**

   270      * General category "Cc" in the Unicode specification.

   271      * @since   1.1

   272      */

   273     public static final byte CONTROL = 15;

   275     /**

   276      * General category "Cf" in the Unicode specification.

   277      * @since   1.1

   278      */

   279     public static final byte FORMAT = 16;

   281     /**

   282      * General category "Co" in the Unicode specification.

   283      * @since   1.1

   284      */

   285     public static final byte PRIVATE_USE = 18;

   287     /**

   288      * General category "Cs" in the Unicode specification.

   289      * @since   1.1

   290      */

   291     public static final byte SURROGATE = 19;

   293     /**

   294      * General category "Pd" in the Unicode specification.

   295      * @since   1.1

   296      */

   297     public static final byte DASH_PUNCTUATION = 20;

   299     /**

   300      * General category "Ps" in the Unicode specification.

   301      * @since   1.1

   302      */

   303     public static final byte START_PUNCTUATION = 21;

   305     /**

   306      * General category "Pe" in the Unicode specification.

   307      * @since   1.1

   308      */

   309     public static final byte END_PUNCTUATION = 22;

   311     /**

   312      * General category "Pc" in the Unicode specification.

   313      * @since   1.1

   314      */

   315     public static final byte CONNECTOR_PUNCTUATION = 23;

   317     /**

   318      * General category "Po" in the Unicode specification.

   319      * @since   1.1

   320      */

   321     public static final byte OTHER_PUNCTUATION = 24;

   323     /**

   324      * General category "Sm" in the Unicode specification.

   325      * @since   1.1

   326      */

   327     public static final byte MATH_SYMBOL = 25;

   329     /**

   330      * General category "Sc" in the Unicode specification.

   331      * @since   1.1

   332      */

   333     public static final byte CURRENCY_SYMBOL = 26;

   335     /**

   336      * General category "Sk" in the Unicode specification.

   337      * @since   1.1

   338      */

   339     public static final byte MODIFIER_SYMBOL = 27;

   341     /**

   342      * General category "So" in the Unicode specification.

   343      * @since   1.1

   344      */

   345     public static final byte OTHER_SYMBOL = 28;

   347     /**

   348      * General category "Pi" in the Unicode specification.

   349      * @since   1.4

   350      */

   351     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;

   353     /**

   354      * General category "Pf" in the Unicode specification.

   355      * @since   1.4

   356      */

   357     public static final byte FINAL_QUOTE_PUNCTUATION = 30;

   359     /**

   360      * Error flag. Use int (code point) to avoid confusion with U+FFFF.

   361      */

   362     static final int ERROR = 0xFFFFFFFF;

   365     /**

   366      * Undefined bidirectional character type. Undefined {@code char}

   367      * values have undefined directionality in the Unicode specification.

   368      * @since 1.4

   369      */

   370     public static final byte DIRECTIONALITY_UNDEFINED = -1;

   372     /**

   373      * Strong bidirectional character type "L" in the Unicode specification.

   374      * @since 1.4

   375      */

   376     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;

   378     /**

   379      * Strong bidirectional character type "R" in the Unicode specification.

   380      * @since 1.4

   381      */

   382     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;

   384     /**

   385     * Strong bidirectional character type "AL" in the Unicode specification.

   386      * @since 1.4

   387      */

   388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;

   390     /**

   391      * Weak bidirectional character type "EN" in the Unicode specification.

   392      * @since 1.4

   393      */

   394     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;

   396     /**

   397      * Weak bidirectional character type "ES" in the Unicode specification.

   398      * @since 1.4

   399      */

   400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;

   402     /**

   403      * Weak bidirectional character type "ET" in the Unicode specification.

   404      * @since 1.4

   405      */

   406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;

   408     /**

   409      * Weak bidirectional character type "AN" in the Unicode specification.

   410      * @since 1.4

   411      */

   412     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;

   414     /**

   415      * Weak bidirectional character type "CS" in the Unicode specification.

   416      * @since 1.4

   417      */

   418     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;

   420     /**

   421      * Weak bidirectional character type "NSM" in the Unicode specification.

   422      * @since 1.4

   423      */

   424     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;

   426     /**

   427      * Weak bidirectional character type "BN" in the Unicode specification.

   428      * @since 1.4

   429      */

   430     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;

   432     /**

   433      * Neutral bidirectional character type "B" in the Unicode specification.

   434      * @since 1.4

   435      */

   436     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;

   438     /**

   439      * Neutral bidirectional character type "S" in the Unicode specification.

   440      * @since 1.4

   441      */

   442     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;

   444     /**

   445      * Neutral bidirectional character type "WS" in the Unicode specification.

   446      * @since 1.4

   447      */

   448     public static final byte DIRECTIONALITY_WHITESPACE = 12;

   450     /**

   451      * Neutral bidirectional character type "ON" in the Unicode specification.

   452      * @since 1.4

   453      */

   454     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;

   456     /**

   457      * Strong bidirectional character type "LRE" in the Unicode specification.

   458      * @since 1.4

   459      */

   460     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;

   462     /**

   463      * Strong bidirectional character type "LRO" in the Unicode specification.

   464      * @since 1.4

   465      */

   466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;

   468     /**

   469      * Strong bidirectional character type "RLE" in the Unicode specification.

   470      * @since 1.4

   471      */

   472     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;

   474     /**

   475      * Strong bidirectional character type "RLO" in the Unicode specification.

   476      * @since 1.4

   477      */

   478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;

   480     /**

   481      * Weak bidirectional character type "PDF" in the Unicode specification.

   482      * @since 1.4

   483      */

   484     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;

   486     /**

   487      * The minimum value of a

   488      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   489      * Unicode high-surrogate code unit</a>

   490      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.

   491      * A high-surrogate is also known as a <i>leading-surrogate</i>.

   492      *

   493      * @since 1.5

   494      */

   495     public static final char MIN_HIGH_SURROGATE = '\uD800';

   497     /**

   498      * The maximum value of a

   499      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   500      * Unicode high-surrogate code unit</a>

   501      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.

   502      * A high-surrogate is also known as a <i>leading-surrogate</i>.

   503      *

   504      * @since 1.5

   505      */

   506     public static final char MAX_HIGH_SURROGATE = '\uDBFF';

   508     /**

   509      * The minimum value of a

   510      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   511      * Unicode low-surrogate code unit</a>

   512      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.

   513      * A low-surrogate is also known as a <i>trailing-surrogate</i>.

   514      *

   515      * @since 1.5

   516      */

   517     public static final char MIN_LOW_SURROGATE  = '\uDC00';

   519     /**

   520      * The maximum value of a

   521      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   522      * Unicode low-surrogate code unit</a>

   523      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.

   524      * A low-surrogate is also known as a <i>trailing-surrogate</i>.

   525      *

   526      * @since 1.5

   527      */

   528     public static final char MAX_LOW_SURROGATE  = '\uDFFF';

   530     /**

   531      * The minimum value of a Unicode surrogate code unit in the

   532      * UTF-16 encoding, constant {@code '\u005CuD800'}.

   533      *

   534      * @since 1.5

   535      */

   536     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;

   538     /**

   539      * The maximum value of a Unicode surrogate code unit in the

   540      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.

   541      *

   542      * @since 1.5

   543      */

   544     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;

   546     /**

   547      * The minimum value of a

   548      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">

   549      * Unicode supplementary code point</a>, constant {@code U+10000}.

   550      *

   551      * @since 1.5

   552      */

   553     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;

   555     /**

   556      * The minimum value of a

   557      * <a href="http://www.unicode.org/glossary/#code_point">

   558      * Unicode code point</a>, constant {@code U+0000}.

   559      *

   560      * @since 1.5

   561      */

   562     public static final int MIN_CODE_POINT = 0x000000;

   564     /**

   565      * The maximum value of a

   566      * <a href="http://www.unicode.org/glossary/#code_point">

   567      * Unicode code point</a>, constant {@code U+10FFFF}.

   568      *

   569      * @since 1.5

   570      */

   571     public static final int MAX_CODE_POINT = 0X10FFFF;

   574     /**

   575      * Instances of this class represent particular subsets of the Unicode

   576      * character set.  The only family of subsets defined in the

   577      * {@code Character} class is {@link Character.UnicodeBlock}.

   578      * Other portions of the Java API may define other subsets for their

   579      * own purposes.

   580      *

   581      * @since 1.2

   582      */

   583     public static class Subset  {

   585         private String name;

   587         /**

   588          * Constructs a new {@code Subset} instance.

   589          *

   590          * @param  name  The name of this subset

   591          * @exception NullPointerException if name is {@code null}

   592          */

   593         protected Subset(String name) {

   594             if (name == null) {

   595                 throw new NullPointerException("name");

   596             }

   597             this.name = name;

   598         }

   600         /**

   601          * Compares two {@code Subset} objects for equality.

   602          * This method returns {@code true} if and only if

   603          * {@code this} and the argument refer to the same

   604          * object; since this method is {@code final}, this

   605          * guarantee holds for all subclasses.

   606          */

   607         public final boolean equals(Object obj) {

   608             return (this == obj);

   609         }

   611         /**

   612          * Returns the standard hash code as defined by the

   613          * {@link Object#hashCode} method.  This method

   614          * is {@code final} in order to ensure that the

   615          * {@code equals} and {@code hashCode} methods will

   616          * be consistent in all subclasses.

   617          */

   618         public final int hashCode() {

   619             return super.hashCode();

   620         }

   622         /**

   623          * Returns the name of this subset.

   624          */

   625         public final String toString() {

   626             return name;

   627         }

   628     }

   630     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt

   631     // for the latest specification of Unicode Blocks.

   634     /**

   635      * The value of the {@code Character}.

   636      *

   637      * @serial

   638      */

   639     private final char value;

   641     /** use serialVersionUID from JDK 1.0.2 for interoperability */

   642     private static final long serialVersionUID = 3786198910865385080L;

   644     /**

   645      * Constructs a newly allocated {@code Character} object that

   646      * represents the specified {@code char} value.

   647      *

   648      * @param  value   the value to be represented by the

   649      *                  {@code Character} object.

   650      */

   651     public Character(char value) {

   652         this.value = value;

   653     }

   655     private static class CharacterCache {

   656         private CharacterCache(){}

   658         static final Character cache[] = new Character[127 + 1];

   660         static {

   661             for (int i = 0; i < cache.length; i++)

   662                 cache[i] = new Character((char)i);

   663         }

   664     }

   666     /**

   667      * Returns a <tt>Character</tt> instance representing the specified

   668      * <tt>char</tt> value.

   669      * If a new <tt>Character</tt> instance is not required, this method

   670      * should generally be used in preference to the constructor

   671      * {@link #Character(char)}, as this method is likely to yield

   672      * significantly better space and time performance by caching

   673      * frequently requested values.

   674      *

   675      * This method will always cache values in the range {@code

   676      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may

   677      * cache other values outside of this range.

   678      *

   679      * @param  c a char value.

   680      * @return a <tt>Character</tt> instance representing <tt>c</tt>.

   681      * @since  1.5

   682      */

   683     public static Character valueOf(char c) {

   684         if (c <= 127) { // must cache

   685             return CharacterCache.cache[(int)c];

   686         }

   687         return new Character(c);

   688     }

   690     /**

   691      * Returns the value of this {@code Character} object.

   692      * @return  the primitive {@code char} value represented by

   693      *          this object.

   694      */

   695     public char charValue() {

   696         return value;

   697     }

   699     /**

   700      * Returns a hash code for this {@code Character}; equal to the result

   701      * of invoking {@code charValue()}.

   702      *

   703      * @return a hash code value for this {@code Character}

   704      */

   705     public int hashCode() {

   706         return (int)value;

   707     }

   709     /**

   710      * Compares this object against the specified object.

   711      * The result is {@code true} if and only if the argument is not

   712      * {@code null} and is a {@code Character} object that

   713      * represents the same {@code char} value as this object.

   714      *

   715      * @param   obj   the object to compare with.

   716      * @return  {@code true} if the objects are the same;

   717      *          {@code false} otherwise.

   718      */

   719     public boolean equals(Object obj) {

   720         if (obj instanceof Character) {

   721             return value == ((Character)obj).charValue();

   722         }

   723         return false;

   724     }

   726     /**

   727      * Returns a {@code String} object representing this

   728      * {@code Character}'s value.  The result is a string of

   729      * length 1 whose sole component is the primitive

   730      * {@code char} value represented by this

   731      * {@code Character} object.

   732      *

   733      * @return  a string representation of this object.

   734      */

   735     public String toString() {

   736         char buf[] = {value};

   737         return String.valueOf(buf);

   738     }

   740     /**

   741      * Returns a {@code String} object representing the

   742      * specified {@code char}.  The result is a string of length

   743      * 1 consisting solely of the specified {@code char}.

   744      *

   745      * @param c the {@code char} to be converted

   746      * @return the string representation of the specified {@code char}

   747      * @since 1.4

   748      */

   749     public static String toString(char c) {

   750         return String.valueOf(c);

   751     }

   753     /**

   754      * Determines whether the specified code point is a valid

   755      * <a href="http://www.unicode.org/glossary/#code_point">

   756      * Unicode code point value</a>.

   757      *

   758      * @param  codePoint the Unicode code point to be tested

   759      * @return {@code true} if the specified code point value is between

   760      *         {@link #MIN_CODE_POINT} and

   761      *         {@link #MAX_CODE_POINT} inclusive;

   762      *         {@code false} otherwise.

   763      * @since  1.5

   764      */

   765     public static boolean isValidCodePoint(int codePoint) {

   766         // Optimized form of:

   767         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT

   768         int plane = codePoint >>> 16;

   769         return plane < ((MAX_CODE_POINT + 1) >>> 16);

   770     }

   772     /**

   773      * Determines whether the specified character (Unicode code point)

   774      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.

   775      * Such code points can be represented using a single {@code char}.

   776      *

   777      * @param  codePoint the character (Unicode code point) to be tested

   778      * @return {@code true} if the specified code point is between

   779      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;

   780      *         {@code false} otherwise.

   781      * @since  1.7

   782      */

   783     public static boolean isBmpCodePoint(int codePoint) {

   784         return codePoint >>> 16 == 0;

   785         // Optimized form of:

   786         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE

   787         // We consistently use logical shift (>>>) to facilitate

   788         // additional runtime optimizations.

   789     }

   791     /**

   792      * Determines whether the specified character (Unicode code point)

   793      * is in the <a href="#supplementary">supplementary character</a> range.

   794      *

   795      * @param  codePoint the character (Unicode code point) to be tested

   796      * @return {@code true} if the specified code point is between

   797      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and

   798      *         {@link #MAX_CODE_POINT} inclusive;

   799      *         {@code false} otherwise.

   800      * @since  1.5

   801      */

   802     public static boolean isSupplementaryCodePoint(int codePoint) {

   803         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT

   804             && codePoint <  MAX_CODE_POINT + 1;

   805     }

   807     /**

   808      * Determines if the given {@code char} value is a

   809      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   810      * Unicode high-surrogate code unit</a>

   811      * (also known as <i>leading-surrogate code unit</i>).

   812      *

   813      * <p>Such values do not represent characters by themselves,

   814      * but are used in the representation of

   815      * <a href="#supplementary">supplementary characters</a>

   816      * in the UTF-16 encoding.

   817      *

   818      * @param  ch the {@code char} value to be tested.

   819      * @return {@code true} if the {@code char} value is between

   820      *         {@link #MIN_HIGH_SURROGATE} and

   821      *         {@link #MAX_HIGH_SURROGATE} inclusive;

   822      *         {@code false} otherwise.

   823      * @see    Character#isLowSurrogate(char)

   824      * @see    Character.UnicodeBlock#of(int)

   825      * @since  1.5

   826      */

   827     public static boolean isHighSurrogate(char ch) {

   828         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE

   829         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);

   830     }

   832     /**

   833      * Determines if the given {@code char} value is a

   834      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   835      * Unicode low-surrogate code unit</a>

   836      * (also known as <i>trailing-surrogate code unit</i>).

   837      *

   838      * <p>Such values do not represent characters by themselves,

   839      * but are used in the representation of

   840      * <a href="#supplementary">supplementary characters</a>

   841      * in the UTF-16 encoding.

   842      *

   843      * @param  ch the {@code char} value to be tested.

   844      * @return {@code true} if the {@code char} value is between

   845      *         {@link #MIN_LOW_SURROGATE} and

   846      *         {@link #MAX_LOW_SURROGATE} inclusive;

   847      *         {@code false} otherwise.

   848      * @see    Character#isHighSurrogate(char)

   849      * @since  1.5

   850      */

   851     public static boolean isLowSurrogate(char ch) {

   852         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);

   853     }

   855     /**

   856      * Determines if the given {@code char} value is a Unicode

   857      * <i>surrogate code unit</i>.

   858      *

   859      * <p>Such values do not represent characters by themselves,

   860      * but are used in the representation of

   861      * <a href="#supplementary">supplementary characters</a>

   862      * in the UTF-16 encoding.

   863      *

   864      * <p>A char value is a surrogate code unit if and only if it is either

   865      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or

   866      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.

   867      *

   868      * @param  ch the {@code char} value to be tested.

   869      * @return {@code true} if the {@code char} value is between

   870      *         {@link #MIN_SURROGATE} and

   871      *         {@link #MAX_SURROGATE} inclusive;

   872      *         {@code false} otherwise.

   873      * @since  1.7

   874      */

   875     public static boolean isSurrogate(char ch) {

   876         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);

   877     }

   879     /**

   880      * Determines whether the specified pair of {@code char}

   881      * values is a valid

   882      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

   883      * Unicode surrogate pair</a>.

   885      * <p>This method is equivalent to the expression:

   886      * <blockquote><pre>

   887      * isHighSurrogate(high) && isLowSurrogate(low)

   888      * </pre></blockquote>

   889      *

   890      * @param  high the high-surrogate code value to be tested

   891      * @param  low the low-surrogate code value to be tested

   892      * @return {@code true} if the specified high and

   893      * low-surrogate code values represent a valid surrogate pair;

   894      * {@code false} otherwise.

   895      * @since  1.5

   896      */

   897     public static boolean isSurrogatePair(char high, char low) {

   898         return isHighSurrogate(high) && isLowSurrogate(low);

   899     }

   901     /**

   902      * Determines the number of {@code char} values needed to

   903      * represent the specified character (Unicode code point). If the

   904      * specified character is equal to or greater than 0x10000, then

   905      * the method returns 2. Otherwise, the method returns 1.

   906      *

   907      * <p>This method doesn't validate the specified character to be a

   908      * valid Unicode code point. The caller must validate the

   909      * character value using {@link #isValidCodePoint(int) isValidCodePoint}

   910      * if necessary.

   911      *

   912      * @param   codePoint the character (Unicode code point) to be tested.

   913      * @return  2 if the character is a valid supplementary character; 1 otherwise.

   914      * @see     Character#isSupplementaryCodePoint(int)

   915      * @since   1.5

   916      */

   917     public static int charCount(int codePoint) {

   918         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;

   919     }

   921     /**

   922      * Converts the specified surrogate pair to its supplementary code

   923      * point value. This method does not validate the specified

   924      * surrogate pair. The caller must validate it using {@link

   925      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.

   926      *

   927      * @param  high the high-surrogate code unit

   928      * @param  low the low-surrogate code unit

   929      * @return the supplementary code point composed from the

   930      *         specified surrogate pair.

   931      * @since  1.5

   932      */

   933     public static int toCodePoint(char high, char low) {

   934         // Optimized form of:

   935         // return ((high - MIN_HIGH_SURROGATE) << 10)

   936         //         + (low - MIN_LOW_SURROGATE)

   937         //         + MIN_SUPPLEMENTARY_CODE_POINT;

   938         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT

   939                                        - (MIN_HIGH_SURROGATE << 10)

   940                                        - MIN_LOW_SURROGATE);

   941     }

   943     /**

   944      * Returns the code point at the given index of the

   945      * {@code CharSequence}. If the {@code char} value at

   946      * the given index in the {@code CharSequence} is in the

   947      * high-surrogate range, the following index is less than the

   948      * length of the {@code CharSequence}, and the

   949      * {@code char} value at the following index is in the

   950      * low-surrogate range, then the supplementary code point

   951      * corresponding to this surrogate pair is returned. Otherwise,

   952      * the {@code char} value at the given index is returned.

   953      *

   954      * @param seq a sequence of {@code char} values (Unicode code

   955      * units)

   956      * @param index the index to the {@code char} values (Unicode

   957      * code units) in {@code seq} to be converted

   958      * @return the Unicode code point at the given index

   959      * @exception NullPointerException if {@code seq} is null.

   960      * @exception IndexOutOfBoundsException if the value

   961      * {@code index} is negative or not less than

   962      * {@link CharSequence#length() seq.length()}.

   963      * @since  1.5

   964      */

   965     public static int codePointAt(CharSequence seq, int index) {

   966         char c1 = seq.charAt(index++);

   967         if (isHighSurrogate(c1)) {

   968             if (index < seq.length()) {

   969                 char c2 = seq.charAt(index);

   970                 if (isLowSurrogate(c2)) {

   971                     return toCodePoint(c1, c2);

   972                 }

   973             }

   974         }

   975         return c1;

   976     }

   978     /**

   979      * Returns the code point at the given index of the

   980      * {@code char} array. If the {@code char} value at

   981      * the given index in the {@code char} array is in the

   982      * high-surrogate range, the following index is less than the

   983      * length of the {@code char} array, and the

   984      * {@code char} value at the following index is in the

   985      * low-surrogate range, then the supplementary code point

   986      * corresponding to this surrogate pair is returned. Otherwise,

   987      * the {@code char} value at the given index is returned.

   988      *

   989      * @param a the {@code char} array

   990      * @param index the index to the {@code char} values (Unicode

   991      * code units) in the {@code char} array to be converted

   992      * @return the Unicode code point at the given index

   993      * @exception NullPointerException if {@code a} is null.

   994      * @exception IndexOutOfBoundsException if the value

   995      * {@code index} is negative or not less than

   996      * the length of the {@code char} array.

   997      * @since  1.5

   998      */

   999     public static int codePointAt(char[] a, int index) {

  1000         return codePointAtImpl(a, index, a.length);

  1001     }

  1003     /**

  1004      * Returns the code point at the given index of the

  1005      * {@code char} array, where only array elements with

  1006      * {@code index} less than {@code limit} can be used. If

  1007      * the {@code char} value at the given index in the

  1008      * {@code char} array is in the high-surrogate range, the

  1009      * following index is less than the {@code limit}, and the

  1010      * {@code char} value at the following index is in the

  1011      * low-surrogate range, then the supplementary code point

  1012      * corresponding to this surrogate pair is returned. Otherwise,

  1013      * the {@code char} value at the given index is returned.

  1014      *

  1015      * @param a the {@code char} array

  1016      * @param index the index to the {@code char} values (Unicode

  1017      * code units) in the {@code char} array to be converted

  1018      * @param limit the index after the last array element that

  1019      * can be used in the {@code char} array

  1020      * @return the Unicode code point at the given index

  1021      * @exception NullPointerException if {@code a} is null.

  1022      * @exception IndexOutOfBoundsException if the {@code index}

  1023      * argument is negative or not less than the {@code limit}

  1024      * argument, or if the {@code limit} argument is negative or

  1025      * greater than the length of the {@code char} array.

  1026      * @since  1.5

  1027      */

  1028     public static int codePointAt(char[] a, int index, int limit) {

  1029         if (index >= limit || limit < 0 || limit > a.length) {

  1030             throw new IndexOutOfBoundsException();

  1031         }

  1032         return codePointAtImpl(a, index, limit);

  1033     }

  1035     // throws ArrayIndexOutofBoundsException if index out of bounds

  1036     static int codePointAtImpl(char[] a, int index, int limit) {

  1037         char c1 = a[index++];

  1038         if (isHighSurrogate(c1)) {

  1039             if (index < limit) {

  1040                 char c2 = a[index];

  1041                 if (isLowSurrogate(c2)) {

  1042                     return toCodePoint(c1, c2);

  1043                 }

  1044             }

  1045         }

  1046         return c1;

  1047     }

  1049     /**

  1050      * Returns the code point preceding the given index of the

  1051      * {@code CharSequence}. If the {@code char} value at

  1052      * {@code (index - 1)} in the {@code CharSequence} is in

  1053      * the low-surrogate range, {@code (index - 2)} is not

  1054      * negative, and the {@code char} value at {@code (index - 2)}

  1055      * in the {@code CharSequence} is in the

  1056      * high-surrogate range, then the supplementary code point

  1057      * corresponding to this surrogate pair is returned. Otherwise,

  1058      * the {@code char} value at {@code (index - 1)} is

  1059      * returned.

  1060      *

  1061      * @param seq the {@code CharSequence} instance

  1062      * @param index the index following the code point that should be returned

  1063      * @return the Unicode code point value before the given index.

  1064      * @exception NullPointerException if {@code seq} is null.

  1065      * @exception IndexOutOfBoundsException if the {@code index}

  1066      * argument is less than 1 or greater than {@link

  1067      * CharSequence#length() seq.length()}.

  1068      * @since  1.5

  1069      */

  1070     public static int codePointBefore(CharSequence seq, int index) {

  1071         char c2 = seq.charAt(--index);

  1072         if (isLowSurrogate(c2)) {

  1073             if (index > 0) {

  1074                 char c1 = seq.charAt(--index);

  1075                 if (isHighSurrogate(c1)) {

  1076                     return toCodePoint(c1, c2);

  1077                 }

  1078             }

  1079         }

  1080         return c2;

  1081     }

  1083     /**

  1084      * Returns the code point preceding the given index of the

  1085      * {@code char} array. If the {@code char} value at

  1086      * {@code (index - 1)} in the {@code char} array is in

  1087      * the low-surrogate range, {@code (index - 2)} is not

  1088      * negative, and the {@code char} value at {@code (index - 2)}

  1089      * in the {@code char} array is in the

  1090      * high-surrogate range, then the supplementary code point

  1091      * corresponding to this surrogate pair is returned. Otherwise,

  1092      * the {@code char} value at {@code (index - 1)} is

  1093      * returned.

  1094      *

  1095      * @param a the {@code char} array

  1096      * @param index the index following the code point that should be returned

  1097      * @return the Unicode code point value before the given index.

  1098      * @exception NullPointerException if {@code a} is null.

  1099      * @exception IndexOutOfBoundsException if the {@code index}

  1100      * argument is less than 1 or greater than the length of the

  1101      * {@code char} array

  1102      * @since  1.5

  1103      */

  1104     public static int codePointBefore(char[] a, int index) {

  1105         return codePointBeforeImpl(a, index, 0);

  1106     }

  1108     /**

  1109      * Returns the code point preceding the given index of the

  1110      * {@code char} array, where only array elements with

  1111      * {@code index} greater than or equal to {@code start}

  1112      * can be used. If the {@code char} value at {@code (index - 1)}

  1113      * in the {@code char} array is in the

  1114      * low-surrogate range, {@code (index - 2)} is not less than

  1115      * {@code start}, and the {@code char} value at

  1116      * {@code (index - 2)} in the {@code char} array is in

  1117      * the high-surrogate range, then the supplementary code point

  1118      * corresponding to this surrogate pair is returned. Otherwise,

  1119      * the {@code char} value at {@code (index - 1)} is

  1120      * returned.

  1121      *

  1122      * @param a the {@code char} array

  1123      * @param index the index following the code point that should be returned

  1124      * @param start the index of the first array element in the

  1125      * {@code char} array

  1126      * @return the Unicode code point value before the given index.

  1127      * @exception NullPointerException if {@code a} is null.

  1128      * @exception IndexOutOfBoundsException if the {@code index}

  1129      * argument is not greater than the {@code start} argument or

  1130      * is greater than the length of the {@code char} array, or

  1131      * if the {@code start} argument is negative or not less than

  1132      * the length of the {@code char} array.

  1133      * @since  1.5

  1134      */

  1135     public static int codePointBefore(char[] a, int index, int start) {

  1136         if (index <= start || start < 0 || start >= a.length) {

  1137             throw new IndexOutOfBoundsException();

  1138         }

  1139         return codePointBeforeImpl(a, index, start);

  1140     }

  1142     // throws ArrayIndexOutofBoundsException if index-1 out of bounds

  1143     static int codePointBeforeImpl(char[] a, int index, int start) {

  1144         char c2 = a[--index];

  1145         if (isLowSurrogate(c2)) {

  1146             if (index > start) {

  1147                 char c1 = a[--index];

  1148                 if (isHighSurrogate(c1)) {

  1149                     return toCodePoint(c1, c2);

  1150                 }

  1151             }

  1152         }

  1153         return c2;

  1154     }

  1156     /**

  1157      * Returns the leading surrogate (a

  1158      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

  1159      * high surrogate code unit</a>) of the

  1160      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  1161      * surrogate pair</a>

  1162      * representing the specified supplementary character (Unicode

  1163      * code point) in the UTF-16 encoding.  If the specified character

  1164      * is not a

  1165      * <a href="Character.html#supplementary">supplementary character</a>,

  1166      * an unspecified {@code char} is returned.

  1167      *

  1168      * <p>If

  1169      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}

  1170      * is {@code true}, then

  1171      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and

  1172      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}

  1173      * are also always {@code true}.

  1174      *

  1175      * @param   codePoint a supplementary character (Unicode code point)

  1176      * @return  the leading surrogate code unit used to represent the

  1177      *          character in the UTF-16 encoding

  1178      * @since   1.7

  1179      */

  1180     public static char highSurrogate(int codePoint) {

  1181         return (char) ((codePoint >>> 10)

  1182             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));

  1183     }

  1185     /**

  1186      * Returns the trailing surrogate (a

  1187      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

  1188      * low surrogate code unit</a>) of the

  1189      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  1190      * surrogate pair</a>

  1191      * representing the specified supplementary character (Unicode

  1192      * code point) in the UTF-16 encoding.  If the specified character

  1193      * is not a

  1194      * <a href="Character.html#supplementary">supplementary character</a>,

  1195      * an unspecified {@code char} is returned.

  1196      *

  1197      * <p>If

  1198      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}

  1199      * is {@code true}, then

  1200      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and

  1201      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}

  1202      * are also always {@code true}.

  1203      *

  1204      * @param   codePoint a supplementary character (Unicode code point)

  1205      * @return  the trailing surrogate code unit used to represent the

  1206      *          character in the UTF-16 encoding

  1207      * @since   1.7

  1208      */

  1209     public static char lowSurrogate(int codePoint) {

  1210         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);

  1211     }

  1213     /**

  1214      * Converts the specified character (Unicode code point) to its

  1215      * UTF-16 representation. If the specified code point is a BMP

  1216      * (Basic Multilingual Plane or Plane 0) value, the same value is

  1217      * stored in {@code dst[dstIndex]}, and 1 is returned. If the

  1218      * specified code point is a supplementary character, its

  1219      * surrogate values are stored in {@code dst[dstIndex]}

  1220      * (high-surrogate) and {@code dst[dstIndex+1]}

  1221      * (low-surrogate), and 2 is returned.

  1222      *

  1223      * @param  codePoint the character (Unicode code point) to be converted.

  1224      * @param  dst an array of {@code char} in which the

  1225      * {@code codePoint}'s UTF-16 value is stored.

  1226      * @param dstIndex the start index into the {@code dst}

  1227      * array where the converted value is stored.

  1228      * @return 1 if the code point is a BMP code point, 2 if the

  1229      * code point is a supplementary code point.

  1230      * @exception IllegalArgumentException if the specified

  1231      * {@code codePoint} is not a valid Unicode code point.

  1232      * @exception NullPointerException if the specified {@code dst} is null.

  1233      * @exception IndexOutOfBoundsException if {@code dstIndex}

  1234      * is negative or not less than {@code dst.length}, or if

  1235      * {@code dst} at {@code dstIndex} doesn't have enough

  1236      * array element(s) to store the resulting {@code char}

  1237      * value(s). (If {@code dstIndex} is equal to

  1238      * {@code dst.length-1} and the specified

  1239      * {@code codePoint} is a supplementary character, the

  1240      * high-surrogate value is not stored in

  1241      * {@code dst[dstIndex]}.)

  1242      * @since  1.5

  1243      */

  1244     public static int toChars(int codePoint, char[] dst, int dstIndex) {

  1245         if (isBmpCodePoint(codePoint)) {

  1246             dst[dstIndex] = (char) codePoint;

  1247             return 1;

  1248         } else if (isValidCodePoint(codePoint)) {

  1249             toSurrogates(codePoint, dst, dstIndex);

  1250             return 2;

  1251         } else {

  1252             throw new IllegalArgumentException();

  1253         }

  1254     }

  1256     /**

  1257      * Converts the specified character (Unicode code point) to its

  1258      * UTF-16 representation stored in a {@code char} array. If

  1259      * the specified code point is a BMP (Basic Multilingual Plane or

  1260      * Plane 0) value, the resulting {@code char} array has

  1261      * the same value as {@code codePoint}. If the specified code

  1262      * point is a supplementary code point, the resulting

  1263      * {@code char} array has the corresponding surrogate pair.

  1264      *

  1265      * @param  codePoint a Unicode code point

  1266      * @return a {@code char} array having

  1267      *         {@code codePoint}'s UTF-16 representation.

  1268      * @exception IllegalArgumentException if the specified

  1269      * {@code codePoint} is not a valid Unicode code point.

  1270      * @since  1.5

  1271      */

  1272     public static char[] toChars(int codePoint) {

  1273         if (isBmpCodePoint(codePoint)) {

  1274             return new char[] { (char) codePoint };

  1275         } else if (isValidCodePoint(codePoint)) {

  1276             char[] result = new char[2];

  1277             toSurrogates(codePoint, result, 0);

  1278             return result;

  1279         } else {

  1280             throw new IllegalArgumentException();

  1281         }

  1282     }

  1284     static void toSurrogates(int codePoint, char[] dst, int index) {

  1285         // We write elements "backwards" to guarantee all-or-nothing

  1286         dst[index+1] = lowSurrogate(codePoint);

  1287         dst[index] = highSurrogate(codePoint);

  1288     }

  1290     /**

  1291      * Returns the number of Unicode code points in the text range of

  1292      * the specified char sequence. The text range begins at the

  1293      * specified {@code beginIndex} and extends to the

  1294      * {@code char} at index {@code endIndex - 1}. Thus the

  1295      * length (in {@code char}s) of the text range is

  1296      * {@code endIndex-beginIndex}. Unpaired surrogates within

  1297      * the text range count as one code point each.

  1298      *

  1299      * @param seq the char sequence

  1300      * @param beginIndex the index to the first {@code char} of

  1301      * the text range.

  1302      * @param endIndex the index after the last {@code char} of

  1303      * the text range.

  1304      * @return the number of Unicode code points in the specified text

  1305      * range

  1306      * @exception NullPointerException if {@code seq} is null.

  1307      * @exception IndexOutOfBoundsException if the

  1308      * {@code beginIndex} is negative, or {@code endIndex}

  1309      * is larger than the length of the given sequence, or

  1310      * {@code beginIndex} is larger than {@code endIndex}.

  1311      * @since  1.5

  1312      */

  1313     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {

  1314         int length = seq.length();

  1315         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {

  1316             throw new IndexOutOfBoundsException();

  1317         }

  1318         int n = endIndex - beginIndex;

  1319         for (int i = beginIndex; i < endIndex; ) {

  1320             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&

  1321                 isLowSurrogate(seq.charAt(i))) {

  1322                 n--;

  1323                 i++;

  1324             }

  1325         }

  1326         return n;

  1327     }

  1329     /**

  1330      * Returns the number of Unicode code points in a subarray of the

  1331      * {@code char} array argument. The {@code offset}

  1332      * argument is the index of the first {@code char} of the

  1333      * subarray and the {@code count} argument specifies the

  1334      * length of the subarray in {@code char}s. Unpaired

  1335      * surrogates within the subarray count as one code point each.

  1336      *

  1337      * @param a the {@code char} array

  1338      * @param offset the index of the first {@code char} in the

  1339      * given {@code char} array

  1340      * @param count the length of the subarray in {@code char}s

  1341      * @return the number of Unicode code points in the specified subarray

  1342      * @exception NullPointerException if {@code a} is null.

  1343      * @exception IndexOutOfBoundsException if {@code offset} or

  1344      * {@code count} is negative, or if {@code offset +

  1345      * count} is larger than the length of the given array.

  1346      * @since  1.5

  1347      */

  1348     public static int codePointCount(char[] a, int offset, int count) {

  1349         if (count > a.length - offset || offset < 0 || count < 0) {

  1350             throw new IndexOutOfBoundsException();

  1351         }

  1352         return codePointCountImpl(a, offset, count);

  1353     }

  1355     static int codePointCountImpl(char[] a, int offset, int count) {

  1356         int endIndex = offset + count;

  1357         int n = count;

  1358         for (int i = offset; i < endIndex; ) {

  1359             if (isHighSurrogate(a[i++]) && i < endIndex &&

  1360                 isLowSurrogate(a[i])) {

  1361                 n--;

  1362                 i++;

  1363             }

  1364         }

  1365         return n;

  1366     }

  1368     /**

  1369      * Returns the index within the given char sequence that is offset

  1370      * from the given {@code index} by {@code codePointOffset}

  1371      * code points. Unpaired surrogates within the text range given by

  1372      * {@code index} and {@code codePointOffset} count as

  1373      * one code point each.

  1374      *

  1375      * @param seq the char sequence

  1376      * @param index the index to be offset

  1377      * @param codePointOffset the offset in code points

  1378      * @return the index within the char sequence

  1379      * @exception NullPointerException if {@code seq} is null.

  1380      * @exception IndexOutOfBoundsException if {@code index}

  1381      *   is negative or larger then the length of the char sequence,

  1382      *   or if {@code codePointOffset} is positive and the

  1383      *   subsequence starting with {@code index} has fewer than

  1384      *   {@code codePointOffset} code points, or if

  1385      *   {@code codePointOffset} is negative and the subsequence

  1386      *   before {@code index} has fewer than the absolute value

  1387      *   of {@code codePointOffset} code points.

  1388      * @since 1.5

  1389      */

  1390     public static int offsetByCodePoints(CharSequence seq, int index,

  1391                                          int codePointOffset) {

  1392         int length = seq.length();

  1393         if (index < 0 || index > length) {

  1394             throw new IndexOutOfBoundsException();

  1395         }

  1397         int x = index;

  1398         if (codePointOffset >= 0) {

  1399             int i;

  1400             for (i = 0; x < length && i < codePointOffset; i++) {

  1401                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&

  1402                     isLowSurrogate(seq.charAt(x))) {

  1403                     x++;

  1404                 }

  1405             }

  1406             if (i < codePointOffset) {

  1407                 throw new IndexOutOfBoundsException();

  1408             }

  1409         } else {

  1410             int i;

  1411             for (i = codePointOffset; x > 0 && i < 0; i++) {

  1412                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&

  1413                     isHighSurrogate(seq.charAt(x-1))) {

  1414                     x--;

  1415                 }

  1416             }

  1417             if (i < 0) {

  1418                 throw new IndexOutOfBoundsException();

  1419             }

  1420         }

  1421         return x;

  1422     }

  1424     /**

  1425      * Returns the index within the given {@code char} subarray

  1426      * that is offset from the given {@code index} by

  1427      * {@code codePointOffset} code points. The

  1428      * {@code start} and {@code count} arguments specify a

  1429      * subarray of the {@code char} array. Unpaired surrogates

  1430      * within the text range given by {@code index} and

  1431      * {@code codePointOffset} count as one code point each.

  1432      *

  1433      * @param a the {@code char} array

  1434      * @param start the index of the first {@code char} of the

  1435      * subarray

  1436      * @param count the length of the subarray in {@code char}s

  1437      * @param index the index to be offset

  1438      * @param codePointOffset the offset in code points

  1439      * @return the index within the subarray

  1440      * @exception NullPointerException if {@code a} is null.

  1441      * @exception IndexOutOfBoundsException

  1442      *   if {@code start} or {@code count} is negative,

  1443      *   or if {@code start + count} is larger than the length of

  1444      *   the given array,

  1445      *   or if {@code index} is less than {@code start} or

  1446      *   larger then {@code start + count},

  1447      *   or if {@code codePointOffset} is positive and the text range

  1448      *   starting with {@code index} and ending with {@code start + count - 1}

  1449      *   has fewer than {@code codePointOffset} code

  1450      *   points,

  1451      *   or if {@code codePointOffset} is negative and the text range

  1452      *   starting with {@code start} and ending with {@code index - 1}

  1453      *   has fewer than the absolute value of

  1454      *   {@code codePointOffset} code points.

  1455      * @since 1.5

  1456      */

  1457     public static int offsetByCodePoints(char[] a, int start, int count,

  1458                                          int index, int codePointOffset) {

  1459         if (count > a.length-start || start < 0 || count < 0

  1460             || index < start || index > start+count) {

  1461             throw new IndexOutOfBoundsException();

  1462         }

  1463         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);

  1464     }

  1466     static int offsetByCodePointsImpl(char[]a, int start, int count,

  1467                                       int index, int codePointOffset) {

  1468         int x = index;

  1469         if (codePointOffset >= 0) {

  1470             int limit = start + count;

  1471             int i;

  1472             for (i = 0; x < limit && i < codePointOffset; i++) {

  1473                 if (isHighSurrogate(a[x++]) && x < limit &&

  1474                     isLowSurrogate(a[x])) {

  1475                     x++;

  1476                 }

  1477             }

  1478             if (i < codePointOffset) {

  1479                 throw new IndexOutOfBoundsException();

  1480             }

  1481         } else {

  1482             int i;

  1483             for (i = codePointOffset; x > start && i < 0; i++) {

  1484                 if (isLowSurrogate(a[--x]) && x > start &&

  1485                     isHighSurrogate(a[x-1])) {

  1486                     x--;

  1487                 }

  1488             }

  1489             if (i < 0) {

  1490                 throw new IndexOutOfBoundsException();

  1491             }

  1492         }

  1493         return x;

  1494     }

  1496     /**

  1497      * Determines if the specified character is a lowercase character.

  1498      * <p>

  1499      * A character is lowercase if its general category type, provided

  1500      * by {@code Character.getType(ch)}, is

  1501      * {@code LOWERCASE_LETTER}, or it has contributory property

  1502      * Other_Lowercase as defined by the Unicode Standard.

  1503      * <p>

  1504      * The following are examples of lowercase characters:

  1505      * <p><blockquote><pre>

  1506      * a b c d e f g h i j k l m n o p q r s t u v w x y z

  1507      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'

  1508      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'

  1509      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'

  1510      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'

  1511      * </pre></blockquote>

  1512      * <p> Many other Unicode characters are lowercase too.

  1513      *

  1514      * <p><b>Note:</b> This method cannot handle <a

  1515      * href="#supplementary"> supplementary characters</a>. To support

  1516      * all Unicode characters, including supplementary characters, use

  1517      * the {@link #isLowerCase(int)} method.

  1518      *

  1519      * @param   ch   the character to be tested.

  1520      * @return  {@code true} if the character is lowercase;

  1521      *          {@code false} otherwise.

  1522      * @see     Character#isLowerCase(char)

  1523      * @see     Character#isTitleCase(char)

  1524      * @see     Character#toLowerCase(char)

  1525      * @see     Character#getType(char)

  1526      */

  1527     public static boolean isLowerCase(char ch) {

  1528         throw new UnsupportedOperationException();

  1529     }

  1531     /**

  1532      * Determines if the specified character is an uppercase character.

  1533      * <p>

  1534      * A character is uppercase if its general category type, provided by

  1535      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.

  1536      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.

  1537      * <p>

  1538      * The following are examples of uppercase characters:

  1539      * <p><blockquote><pre>

  1540      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

  1541      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'

  1542      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'

  1543      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'

  1544      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'

  1545      * </pre></blockquote>

  1546      * <p> Many other Unicode characters are uppercase too.<p>

  1547      *

  1548      * <p><b>Note:</b> This method cannot handle <a

  1549      * href="#supplementary"> supplementary characters</a>. To support

  1550      * all Unicode characters, including supplementary characters, use

  1551      * the {@link #isUpperCase(int)} method.

  1552      *

  1553      * @param   ch   the character to be tested.

  1554      * @return  {@code true} if the character is uppercase;

  1555      *          {@code false} otherwise.

  1556      * @see     Character#isLowerCase(char)

  1557      * @see     Character#isTitleCase(char)

  1558      * @see     Character#toUpperCase(char)

  1559      * @see     Character#getType(char)

  1560      * @since   1.0

  1561      */

  1562     public static boolean isUpperCase(char ch) {

  1563         throw new UnsupportedOperationException();

  1564     }

  1566     /**

  1567      * Determines if the specified character is a titlecase character.

  1568      * <p>

  1569      * A character is a titlecase character if its general

  1570      * category type, provided by {@code Character.getType(ch)},

  1571      * is {@code TITLECASE_LETTER}.

  1572      * <p>

  1573      * Some characters look like pairs of Latin letters. For example, there

  1574      * is an uppercase letter that looks like "LJ" and has a corresponding

  1575      * lowercase letter that looks like "lj". A third form, which looks like "Lj",

  1576      * is the appropriate form to use when rendering a word in lowercase

  1577      * with initial capitals, as for a book title.

  1578      * <p>

  1579      * These are some of the Unicode characters for which this method returns

  1580      * {@code true}:

  1581      * <ul>

  1582      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}

  1583      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}

  1584      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}

  1585      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}

  1586      * </ul>

  1587      * <p> Many other Unicode characters are titlecase too.<p>

  1588      *

  1589      * <p><b>Note:</b> This method cannot handle <a

  1590      * href="#supplementary"> supplementary characters</a>. To support

  1591      * all Unicode characters, including supplementary characters, use

  1592      * the {@link #isTitleCase(int)} method.

  1593      *

  1594      * @param   ch   the character to be tested.

  1595      * @return  {@code true} if the character is titlecase;

  1596      *          {@code false} otherwise.

  1597      * @see     Character#isLowerCase(char)

  1598      * @see     Character#isUpperCase(char)

  1599      * @see     Character#toTitleCase(char)

  1600      * @see     Character#getType(char)

  1601      * @since   1.0.2

  1602      */

  1603     public static boolean isTitleCase(char ch) {

  1604         return isTitleCase((int)ch);

  1605     }

  1607     /**

  1608      * Determines if the specified character (Unicode code point) is a titlecase character.

  1609      * <p>

  1610      * A character is a titlecase character if its general

  1611      * category type, provided by {@link Character#getType(int) getType(codePoint)},

  1612      * is {@code TITLECASE_LETTER}.

  1613      * <p>

  1614      * Some characters look like pairs of Latin letters. For example, there

  1615      * is an uppercase letter that looks like "LJ" and has a corresponding

  1616      * lowercase letter that looks like "lj". A third form, which looks like "Lj",

  1617      * is the appropriate form to use when rendering a word in lowercase

  1618      * with initial capitals, as for a book title.

  1619      * <p>

  1620      * These are some of the Unicode characters for which this method returns

  1621      * {@code true}:

  1622      * <ul>

  1623      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}

  1624      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}

  1625      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}

  1626      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}

  1627      * </ul>

  1628      * <p> Many other Unicode characters are titlecase too.<p>

  1629      *

  1630      * @param   codePoint the character (Unicode code point) to be tested.

  1631      * @return  {@code true} if the character is titlecase;

  1632      *          {@code false} otherwise.

  1633      * @see     Character#isLowerCase(int)

  1634      * @see     Character#isUpperCase(int)

  1635      * @see     Character#toTitleCase(int)

  1636      * @see     Character#getType(int)

  1637      * @since   1.5

  1638      */

  1639     public static boolean isTitleCase(int codePoint) {

  1640         return getType(codePoint) == Character.TITLECASE_LETTER;

  1641     }

  1643     /**

  1644      * Determines if the specified character is a digit.

  1645      * <p>

  1646      * A character is a digit if its general category type, provided

  1647      * by {@code Character.getType(ch)}, is

  1648      * {@code DECIMAL_DIGIT_NUMBER}.

  1649      * <p>

  1650      * Some Unicode character ranges that contain digits:

  1651      * <ul>

  1652      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},

  1653      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})

  1654      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},

  1655      *     Arabic-Indic digits

  1656      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},

  1657      *     Extended Arabic-Indic digits

  1658      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},

  1659      *     Devanagari digits

  1660      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},

  1661      *     Fullwidth digits

  1662      * </ul>

  1663      *

  1664      * Many other character ranges contain digits as well.

  1665      *

  1666      * <p><b>Note:</b> This method cannot handle <a

  1667      * href="#supplementary"> supplementary characters</a>. To support

  1668      * all Unicode characters, including supplementary characters, use

  1669      * the {@link #isDigit(int)} method.

  1670      *

  1671      * @param   ch   the character to be tested.

  1672      * @return  {@code true} if the character is a digit;

  1673      *          {@code false} otherwise.

  1674      * @see     Character#digit(char, int)

  1675      * @see     Character#forDigit(int, int)

  1676      * @see     Character#getType(char)

  1677      */

  1678     public static boolean isDigit(char ch) {

  1679         return isDigit((int)ch);

  1680     }

  1682     /**

  1683      * Determines if the specified character (Unicode code point) is a digit.

  1684      * <p>

  1685      * A character is a digit if its general category type, provided

  1686      * by {@link Character#getType(int) getType(codePoint)}, is

  1687      * {@code DECIMAL_DIGIT_NUMBER}.

  1688      * <p>

  1689      * Some Unicode character ranges that contain digits:

  1690      * <ul>

  1691      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},

  1692      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})

  1693      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},

  1694      *     Arabic-Indic digits

  1695      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},

  1696      *     Extended Arabic-Indic digits

  1697      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},

  1698      *     Devanagari digits

  1699      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},

  1700      *     Fullwidth digits

  1701      * </ul>

  1702      *

  1703      * Many other character ranges contain digits as well.

  1704      *

  1705      * @param   codePoint the character (Unicode code point) to be tested.

  1706      * @return  {@code true} if the character is a digit;

  1707      *          {@code false} otherwise.

  1708      * @see     Character#forDigit(int, int)

  1709      * @see     Character#getType(int)

  1710      * @since   1.5

  1711      */

  1712     public static boolean isDigit(int codePoint) {

  1713         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;

  1714     }

  1716     /**

  1717      * Determines if a character is defined in Unicode.

  1718      * <p>

  1719      * A character is defined if at least one of the following is true:

  1720      * <ul>

  1721      * <li>It has an entry in the UnicodeData file.

  1722      * <li>It has a value in a range defined by the UnicodeData file.

  1723      * </ul>

  1724      *

  1725      * <p><b>Note:</b> This method cannot handle <a

  1726      * href="#supplementary"> supplementary characters</a>. To support

  1727      * all Unicode characters, including supplementary characters, use

  1728      * the {@link #isDefined(int)} method.

  1729      *

  1730      * @param   ch   the character to be tested

  1731      * @return  {@code true} if the character has a defined meaning

  1732      *          in Unicode; {@code false} otherwise.

  1733      * @see     Character#isDigit(char)

  1734      * @see     Character#isLetter(char)

  1735      * @see     Character#isLetterOrDigit(char)

  1736      * @see     Character#isLowerCase(char)

  1737      * @see     Character#isTitleCase(char)

  1738      * @see     Character#isUpperCase(char)

  1739      * @since   1.0.2

  1740      */

  1741     public static boolean isDefined(char ch) {

  1742         return isDefined((int)ch);

  1743     }

  1745     /**

  1746      * Determines if a character (Unicode code point) is defined in Unicode.

  1747      * <p>

  1748      * A character is defined if at least one of the following is true:

  1749      * <ul>

  1750      * <li>It has an entry in the UnicodeData file.

  1751      * <li>It has a value in a range defined by the UnicodeData file.

  1752      * </ul>

  1753      *

  1754      * @param   codePoint the character (Unicode code point) to be tested.

  1755      * @return  {@code true} if the character has a defined meaning

  1756      *          in Unicode; {@code false} otherwise.

  1757      * @see     Character#isDigit(int)

  1758      * @see     Character#isLetter(int)

  1759      * @see     Character#isLetterOrDigit(int)

  1760      * @see     Character#isLowerCase(int)

  1761      * @see     Character#isTitleCase(int)

  1762      * @see     Character#isUpperCase(int)

  1763      * @since   1.5

  1764      */

  1765     public static boolean isDefined(int codePoint) {

  1766         return getType(codePoint) != Character.UNASSIGNED;

  1767     }

  1769     /**

  1770      * Determines if the specified character is a letter.

  1771      * <p>

  1772      * A character is considered to be a letter if its general

  1773      * category type, provided by {@code Character.getType(ch)},

  1774      * is any of the following:

  1775      * <ul>

  1776      * <li> {@code UPPERCASE_LETTER}

  1777      * <li> {@code LOWERCASE_LETTER}

  1778      * <li> {@code TITLECASE_LETTER}

  1779      * <li> {@code MODIFIER_LETTER}

  1780      * <li> {@code OTHER_LETTER}

  1781      * </ul>

  1782      *

  1783      * Not all letters have case. Many characters are

  1784      * letters but are neither uppercase nor lowercase nor titlecase.

  1785      *

  1786      * <p><b>Note:</b> This method cannot handle <a

  1787      * href="#supplementary"> supplementary characters</a>. To support

  1788      * all Unicode characters, including supplementary characters, use

  1789      * the {@link #isLetter(int)} method.

  1790      *

  1791      * @param   ch   the character to be tested.

  1792      * @return  {@code true} if the character is a letter;

  1793      *          {@code false} otherwise.

  1794      * @see     Character#isDigit(char)

  1795      * @see     Character#isJavaIdentifierStart(char)

  1796      * @see     Character#isJavaLetter(char)

  1797      * @see     Character#isJavaLetterOrDigit(char)

  1798      * @see     Character#isLetterOrDigit(char)

  1799      * @see     Character#isLowerCase(char)

  1800      * @see     Character#isTitleCase(char)

  1801      * @see     Character#isUnicodeIdentifierStart(char)

  1802      * @see     Character#isUpperCase(char)

  1803      */

  1804     public static boolean isLetter(char ch) {

  1805         return isLetter((int)ch);

  1806     }

  1808     /**

  1809      * Determines if the specified character (Unicode code point) is a letter.

  1810      * <p>

  1811      * A character is considered to be a letter if its general

  1812      * category type, provided by {@link Character#getType(int) getType(codePoint)},

  1813      * is any of the following:

  1814      * <ul>

  1815      * <li> {@code UPPERCASE_LETTER}

  1816      * <li> {@code LOWERCASE_LETTER}

  1817      * <li> {@code TITLECASE_LETTER}

  1818      * <li> {@code MODIFIER_LETTER}

  1819      * <li> {@code OTHER_LETTER}

  1820      * </ul>

  1821      *

  1822      * Not all letters have case. Many characters are

  1823      * letters but are neither uppercase nor lowercase nor titlecase.

  1824      *

  1825      * @param   codePoint the character (Unicode code point) to be tested.

  1826      * @return  {@code true} if the character is a letter;

  1827      *          {@code false} otherwise.

  1828      * @see     Character#isDigit(int)

  1829      * @see     Character#isJavaIdentifierStart(int)

  1830      * @see     Character#isLetterOrDigit(int)

  1831      * @see     Character#isLowerCase(int)

  1832      * @see     Character#isTitleCase(int)

  1833      * @see     Character#isUnicodeIdentifierStart(int)

  1834      * @see     Character#isUpperCase(int)

  1835      * @since   1.5

  1836      */

  1837     public static boolean isLetter(int codePoint) {

  1838         return ((((1 << Character.UPPERCASE_LETTER) |

  1839             (1 << Character.LOWERCASE_LETTER) |

  1840             (1 << Character.TITLECASE_LETTER) |

  1841             (1 << Character.MODIFIER_LETTER) |

  1842             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)

  1843             != 0;

  1844     }

  1846     /**

  1847      * Determines if the specified character is a letter or digit.

  1848      * <p>

  1849      * A character is considered to be a letter or digit if either

  1850      * {@code Character.isLetter(char ch)} or

  1851      * {@code Character.isDigit(char ch)} returns

  1852      * {@code true} for the character.

  1853      *

  1854      * <p><b>Note:</b> This method cannot handle <a

  1855      * href="#supplementary"> supplementary characters</a>. To support

  1856      * all Unicode characters, including supplementary characters, use

  1857      * the {@link #isLetterOrDigit(int)} method.

  1858      *

  1859      * @param   ch   the character to be tested.

  1860      * @return  {@code true} if the character is a letter or digit;

  1861      *          {@code false} otherwise.

  1862      * @see     Character#isDigit(char)

  1863      * @see     Character#isJavaIdentifierPart(char)

  1864      * @see     Character#isJavaLetter(char)

  1865      * @see     Character#isJavaLetterOrDigit(char)

  1866      * @see     Character#isLetter(char)

  1867      * @see     Character#isUnicodeIdentifierPart(char)

  1868      * @since   1.0.2

  1869      */

  1870     public static boolean isLetterOrDigit(char ch) {

  1871         return isLetterOrDigit((int)ch);

  1872     }

  1874     /**

  1875      * Determines if the specified character (Unicode code point) is a letter or digit.

  1876      * <p>

  1877      * A character is considered to be a letter or digit if either

  1878      * {@link #isLetter(int) isLetter(codePoint)} or

  1879      * {@link #isDigit(int) isDigit(codePoint)} returns

  1880      * {@code true} for the character.

  1881      *

  1882      * @param   codePoint the character (Unicode code point) to be tested.

  1883      * @return  {@code true} if the character is a letter or digit;

  1884      *          {@code false} otherwise.

  1885      * @see     Character#isDigit(int)

  1886      * @see     Character#isJavaIdentifierPart(int)

  1887      * @see     Character#isLetter(int)

  1888      * @see     Character#isUnicodeIdentifierPart(int)

  1889      * @since   1.5

  1890      */

  1891     public static boolean isLetterOrDigit(int codePoint) {

  1892         return ((((1 << Character.UPPERCASE_LETTER) |

  1893             (1 << Character.LOWERCASE_LETTER) |

  1894             (1 << Character.TITLECASE_LETTER) |

  1895             (1 << Character.MODIFIER_LETTER) |

  1896             (1 << Character.OTHER_LETTER) |

  1897             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)

  1898             != 0;

  1899     }

  1901     static int getType(int x) {

  1902         throw new UnsupportedOperationException();

  1903     }

  1905     /**

  1906      * Converts the character argument to lowercase using case

  1907      * mapping information from the UnicodeData file.

  1908      * <p>

  1909      * Note that

  1910      * {@code Character.isLowerCase(Character.toLowerCase(ch))}

  1911      * does not always return {@code true} for some ranges of

  1912      * characters, particularly those that are symbols or ideographs.

  1913      *

  1914      * <p>In general, {@link String#toLowerCase()} should be used to map

  1915      * characters to lowercase. {@code String} case mapping methods

  1916      * have several benefits over {@code Character} case mapping methods.

  1917      * {@code String} case mapping methods can perform locale-sensitive

  1918      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  1919      * the {@code Character} case mapping methods cannot.

  1920      *

  1921      * <p><b>Note:</b> This method cannot handle <a

  1922      * href="#supplementary"> supplementary characters</a>. To support

  1923      * all Unicode characters, including supplementary characters, use

  1924      * the {@link #toLowerCase(int)} method.

  1925      *

  1926      * @param   ch   the character to be converted.

  1927      * @return  the lowercase equivalent of the character, if any;

  1928      *          otherwise, the character itself.

  1929      * @see     Character#isLowerCase(char)

  1930      * @see     String#toLowerCase()

  1931      */

  1932     public static char toLowerCase(char ch) {

  1933         throw new UnsupportedOperationException();

  1934     }

  1936     /**

  1937      * Converts the character argument to uppercase using case mapping

  1938      * information from the UnicodeData file.

  1939      * <p>

  1940      * Note that

  1941      * {@code Character.isUpperCase(Character.toUpperCase(ch))}

  1942      * does not always return {@code true} for some ranges of

  1943      * characters, particularly those that are symbols or ideographs.

  1944      *

  1945      * <p>In general, {@link String#toUpperCase()} should be used to map

  1946      * characters to uppercase. {@code String} case mapping methods

  1947      * have several benefits over {@code Character} case mapping methods.

  1948      * {@code String} case mapping methods can perform locale-sensitive

  1949      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  1950      * the {@code Character} case mapping methods cannot.

  1951      *

  1952      * <p><b>Note:</b> This method cannot handle <a

  1953      * href="#supplementary"> supplementary characters</a>. To support

  1954      * all Unicode characters, including supplementary characters, use

  1955      * the {@link #toUpperCase(int)} method.

  1956      *

  1957      * @param   ch   the character to be converted.

  1958      * @return  the uppercase equivalent of the character, if any;

  1959      *          otherwise, the character itself.

  1960      * @see     Character#isUpperCase(char)

  1961      * @see     String#toUpperCase()

  1962      */

  1963     public static char toUpperCase(char ch) {

  1964         throw new UnsupportedOperationException();

  1965     }

  1967     /**

  1968      * Returns the numeric value of the character {@code ch} in the

  1969      * specified radix.

  1970      * <p>

  1971      * If the radix is not in the range {@code MIN_RADIX} &le;

  1972      * {@code radix} &le; {@code MAX_RADIX} or if the

  1973      * value of {@code ch} is not a valid digit in the specified

  1974      * radix, {@code -1} is returned. A character is a valid digit

  1975      * if at least one of the following is true:

  1976      * <ul>

  1977      * <li>The method {@code isDigit} is {@code true} of the character

  1978      *     and the Unicode decimal digit value of the character (or its

  1979      *     single-character decomposition) is less than the specified radix.

  1980      *     In this case the decimal digit value is returned.

  1981      * <li>The character is one of the uppercase Latin letters

  1982      *     {@code 'A'} through {@code 'Z'} and its code is less than

  1983      *     {@code radix + 'A' - 10}.

  1984      *     In this case, {@code ch - 'A' + 10}

  1985      *     is returned.

  1986      * <li>The character is one of the lowercase Latin letters

  1987      *     {@code 'a'} through {@code 'z'} and its code is less than

  1988      *     {@code radix + 'a' - 10}.

  1989      *     In this case, {@code ch - 'a' + 10}

  1990      *     is returned.

  1991      * <li>The character is one of the fullwidth uppercase Latin letters A

  1992      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})

  1993      *     and its code is less than

  1994      *     {@code radix + '\u005CuFF21' - 10}.

  1995      *     In this case, {@code ch - '\u005CuFF21' + 10}

  1996      *     is returned.

  1997      * <li>The character is one of the fullwidth lowercase Latin letters a

  1998      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})

  1999      *     and its code is less than

  2000      *     {@code radix + '\u005CuFF41' - 10}.

  2001      *     In this case, {@code ch - '\u005CuFF41' + 10}

  2002      *     is returned.

  2003      * </ul>

  2004      *

  2005      * <p><b>Note:</b> This method cannot handle <a

  2006      * href="#supplementary"> supplementary characters</a>. To support

  2007      * all Unicode characters, including supplementary characters, use

  2008      * the {@link #digit(int, int)} method.

  2009      *

  2010      * @param   ch      the character to be converted.

  2011      * @param   radix   the radix.

  2012      * @return  the numeric value represented by the character in the

  2013      *          specified radix.

  2014      * @see     Character#forDigit(int, int)

  2015      * @see     Character#isDigit(char)

  2016      */

  2017     public static int digit(char ch, int radix) {

  2018         return digit((int)ch, radix);

  2019     }

  2021     /**

  2022      * Returns the numeric value of the specified character (Unicode

  2023      * code point) in the specified radix.

  2024      *

  2025      * <p>If the radix is not in the range {@code MIN_RADIX} &le;

  2026      * {@code radix} &le; {@code MAX_RADIX} or if the

  2027      * character is not a valid digit in the specified

  2028      * radix, {@code -1} is returned. A character is a valid digit

  2029      * if at least one of the following is true:

  2030      * <ul>

  2031      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character

  2032      *     and the Unicode decimal digit value of the character (or its

  2033      *     single-character decomposition) is less than the specified radix.

  2034      *     In this case the decimal digit value is returned.

  2035      * <li>The character is one of the uppercase Latin letters

  2036      *     {@code 'A'} through {@code 'Z'} and its code is less than

  2037      *     {@code radix + 'A' - 10}.

  2038      *     In this case, {@code codePoint - 'A' + 10}

  2039      *     is returned.

  2040      * <li>The character is one of the lowercase Latin letters

  2041      *     {@code 'a'} through {@code 'z'} and its code is less than

  2042      *     {@code radix + 'a' - 10}.

  2043      *     In this case, {@code codePoint - 'a' + 10}

  2044      *     is returned.

  2045      * <li>The character is one of the fullwidth uppercase Latin letters A

  2046      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})

  2047      *     and its code is less than

  2048      *     {@code radix + '\u005CuFF21' - 10}.

  2049      *     In this case,

  2050      *     {@code codePoint - '\u005CuFF21' + 10}

  2051      *     is returned.

  2052      * <li>The character is one of the fullwidth lowercase Latin letters a

  2053      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})

  2054      *     and its code is less than

  2055      *     {@code radix + '\u005CuFF41'- 10}.

  2056      *     In this case,

  2057      *     {@code codePoint - '\u005CuFF41' + 10}

  2058      *     is returned.

  2059      * </ul>

  2060      *

  2061      * @param   codePoint the character (Unicode code point) to be converted.

  2062      * @param   radix   the radix.

  2063      * @return  the numeric value represented by the character in the

  2064      *          specified radix.

  2065      * @see     Character#forDigit(int, int)

  2066      * @see     Character#isDigit(int)

  2067      * @since   1.5

  2068      */

  2069     public static int digit(int codePoint, int radix) {

  2070         throw new UnsupportedOperationException();

  2071     }

  2073     /**

  2074      * Returns the {@code int} value that the specified Unicode

  2075      * character represents. For example, the character

  2076      * {@code '\u005Cu216C'} (the roman numeral fifty) will return

  2077      * an int with a value of 50.

  2078      * <p>

  2079      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through

  2080      * {@code '\u005Cu005A'}), lowercase

  2081      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and

  2082      * full width variant ({@code '\u005CuFF21'} through

  2083      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through

  2084      * {@code '\u005CuFF5A'}) forms have numeric values from 10

  2085      * through 35. This is independent of the Unicode specification,

  2086      * which does not assign numeric values to these {@code char}

  2087      * values.

  2088      * <p>

  2089      * If the character does not have a numeric value, then -1 is returned.

  2090      * If the character has a numeric value that cannot be represented as a

  2091      * nonnegative integer (for example, a fractional value), then -2

  2092      * is returned.

  2093      *

  2094      * <p><b>Note:</b> This method cannot handle <a

  2095      * href="#supplementary"> supplementary characters</a>. To support

  2096      * all Unicode characters, including supplementary characters, use

  2097      * the {@link #getNumericValue(int)} method.

  2098      *

  2099      * @param   ch      the character to be converted.

  2100      * @return  the numeric value of the character, as a nonnegative {@code int}

  2101      *           value; -2 if the character has a numeric value that is not a

  2102      *          nonnegative integer; -1 if the character has no numeric value.

  2103      * @see     Character#forDigit(int, int)

  2104      * @see     Character#isDigit(char)

  2105      * @since   1.1

  2106      */

  2107     public static int getNumericValue(char ch) {

  2108         return getNumericValue((int)ch);

  2109     }

  2111     /**

  2112      * Returns the {@code int} value that the specified

  2113      * character (Unicode code point) represents. For example, the character

  2114      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return

  2115      * an {@code int} with a value of 50.

  2116      * <p>

  2117      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through

  2118      * {@code '\u005Cu005A'}), lowercase

  2119      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and

  2120      * full width variant ({@code '\u005CuFF21'} through

  2121      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through

  2122      * {@code '\u005CuFF5A'}) forms have numeric values from 10

  2123      * through 35. This is independent of the Unicode specification,

  2124      * which does not assign numeric values to these {@code char}

  2125      * values.

  2126      * <p>

  2127      * If the character does not have a numeric value, then -1 is returned.

  2128      * If the character has a numeric value that cannot be represented as a

  2129      * nonnegative integer (for example, a fractional value), then -2

  2130      * is returned.

  2131      *

  2132      * @param   codePoint the character (Unicode code point) to be converted.

  2133      * @return  the numeric value of the character, as a nonnegative {@code int}

  2134      *          value; -2 if the character has a numeric value that is not a

  2135      *          nonnegative integer; -1 if the character has no numeric value.

  2136      * @see     Character#forDigit(int, int)

  2137      * @see     Character#isDigit(int)

  2138      * @since   1.5

  2139      */

  2140     public static int getNumericValue(int codePoint) {

  2141         throw new UnsupportedOperationException();

  2142     }

  2144     /**

  2145      * Determines if the specified character is ISO-LATIN-1 white space.

  2146      * This method returns {@code true} for the following five

  2147      * characters only:

  2148      * <table>

  2149      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>

  2150      *     <td>{@code HORIZONTAL TABULATION}</td></tr>

  2151      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>

  2152      *     <td>{@code NEW LINE}</td></tr>

  2153      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>

  2154      *     <td>{@code FORM FEED}</td></tr>

  2155      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>

  2156      *     <td>{@code CARRIAGE RETURN}</td></tr>

  2157      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>

  2158      *     <td>{@code SPACE}</td></tr>

  2159      * </table>

  2160      *

  2161      * @param      ch   the character to be tested.

  2162      * @return     {@code true} if the character is ISO-LATIN-1 white

  2163      *             space; {@code false} otherwise.

  2164      * @see        Character#isSpaceChar(char)

  2165      * @see        Character#isWhitespace(char)

  2166      * @deprecated Replaced by isWhitespace(char).

  2167      */

  2168     @Deprecated

  2169     public static boolean isSpace(char ch) {

  2170         return (ch <= 0x0020) &&

  2171             (((((1L << 0x0009) |

  2172             (1L << 0x000A) |

  2173             (1L << 0x000C) |

  2174             (1L << 0x000D) |

  2175             (1L << 0x0020)) >> ch) & 1L) != 0);

  2176     }

  2180     /**

  2181      * Determines if the specified character is white space according to Java.

  2182      * A character is a Java whitespace character if and only if it satisfies

  2183      * one of the following criteria:

  2184      * <ul>

  2185      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},

  2186      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})

  2187      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},

  2188      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).

  2189      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.

  2190      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.

  2191      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.

  2192      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.

  2193      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.

  2194      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.

  2195      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.

  2196      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.

  2197      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.

  2198      * </ul>

  2199      *

  2200      * <p><b>Note:</b> This method cannot handle <a

  2201      * href="#supplementary"> supplementary characters</a>. To support

  2202      * all Unicode characters, including supplementary characters, use

  2203      * the {@link #isWhitespace(int)} method.

  2204      *

  2205      * @param   ch the character to be tested.

  2206      * @return  {@code true} if the character is a Java whitespace

  2207      *          character; {@code false} otherwise.

  2208      * @see     Character#isSpaceChar(char)

  2209      * @since   1.1

  2210      */

  2211     public static boolean isWhitespace(char ch) {

  2212         return isWhitespace((int)ch);

  2213     }

  2215     /**

  2216      * Determines if the specified character (Unicode code point) is

  2217      * white space according to Java.  A character is a Java

  2218      * whitespace character if and only if it satisfies one of the

  2219      * following criteria:

  2220      * <ul>

  2221      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},

  2222      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})

  2223      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},

  2224      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).

  2225      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.

  2226      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.

  2227      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.

  2228      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.

  2229      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.

  2230      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.

  2231      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.

  2232      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.

  2233      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.

  2234      * </ul>

  2235      * <p>

  2236      *

  2237      * @param   codePoint the character (Unicode code point) to be tested.

  2238      * @return  {@code true} if the character is a Java whitespace

  2239      *          character; {@code false} otherwise.

  2240      * @see     Character#isSpaceChar(int)

  2241      * @since   1.5

  2242      */

  2243     public static boolean isWhitespace(int codePoint) {

  2244         throw new UnsupportedOperationException();

  2245     }

  2247     /**

  2248      * Determines if the specified character is an ISO control

  2249      * character.  A character is considered to be an ISO control

  2250      * character if its code is in the range {@code '\u005Cu0000'}

  2251      * through {@code '\u005Cu001F'} or in the range

  2252      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.

  2253      *

  2254      * <p><b>Note:</b> This method cannot handle <a

  2255      * href="#supplementary"> supplementary characters</a>. To support

  2256      * all Unicode characters, including supplementary characters, use

  2257      * the {@link #isISOControl(int)} method.

  2258      *

  2259      * @param   ch      the character to be tested.

  2260      * @return  {@code true} if the character is an ISO control character;

  2261      *          {@code false} otherwise.

  2262      *

  2263      * @see     Character#isSpaceChar(char)

  2264      * @see     Character#isWhitespace(char)

  2265      * @since   1.1

  2266      */

  2267     public static boolean isISOControl(char ch) {

  2268         return isISOControl((int)ch);

  2269     }

  2271     /**

  2272      * Determines if the referenced character (Unicode code point) is an ISO control

  2273      * character.  A character is considered to be an ISO control

  2274      * character if its code is in the range {@code '\u005Cu0000'}

  2275      * through {@code '\u005Cu001F'} or in the range

  2276      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.

  2277      *

  2278      * @param   codePoint the character (Unicode code point) to be tested.

  2279      * @return  {@code true} if the character is an ISO control character;

  2280      *          {@code false} otherwise.

  2281      * @see     Character#isSpaceChar(int)

  2282      * @see     Character#isWhitespace(int)

  2283      * @since   1.5

  2284      */

  2285     public static boolean isISOControl(int codePoint) {

  2286         // Optimized form of:

  2287         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||

  2288         //     (codePoint >= 0x7F && codePoint <= 0x9F);

  2289         return codePoint <= 0x9F &&

  2290             (codePoint >= 0x7F || (codePoint >>> 5 == 0));

  2291     }

  2293     /**

  2294      * Determines the character representation for a specific digit in

  2295      * the specified radix. If the value of {@code radix} is not a

  2296      * valid radix, or the value of {@code digit} is not a valid

  2297      * digit in the specified radix, the null character

  2298      * ({@code '\u005Cu0000'}) is returned.

  2299      * <p>

  2300      * The {@code radix} argument is valid if it is greater than or

  2301      * equal to {@code MIN_RADIX} and less than or equal to

  2302      * {@code MAX_RADIX}. The {@code digit} argument is valid if

  2303      * {@code 0 <= digit < radix}.

  2304      * <p>

  2305      * If the digit is less than 10, then

  2306      * {@code '0' + digit} is returned. Otherwise, the value

  2307      * {@code 'a' + digit - 10} is returned.

  2308      *

  2309      * @param   digit   the number to convert to a character.

  2310      * @param   radix   the radix.

  2311      * @return  the {@code char} representation of the specified digit

  2312      *          in the specified radix.

  2313      * @see     Character#MIN_RADIX

  2314      * @see     Character#MAX_RADIX

  2315      * @see     Character#digit(char, int)

  2316      */

  2317     public static char forDigit(int digit, int radix) {

  2318         if ((digit >= radix) || (digit < 0)) {

  2319             return '\0';

  2320         }

  2321         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {

  2322             return '\0';

  2323         }

  2324         if (digit < 10) {

  2325             return (char)('0' + digit);

  2326         }

  2327         return (char)('a' - 10 + digit);

  2328     }

  2330     /**

  2331      * Compares two {@code Character} objects numerically.

  2332      *

  2333      * @param   anotherCharacter   the {@code Character} to be compared.

  2335      * @return  the value {@code 0} if the argument {@code Character}

  2336      *          is equal to this {@code Character}; a value less than

  2337      *          {@code 0} if this {@code Character} is numerically less

  2338      *          than the {@code Character} argument; and a value greater than

  2339      *          {@code 0} if this {@code Character} is numerically greater

  2340      *          than the {@code Character} argument (unsigned comparison).

  2341      *          Note that this is strictly a numerical comparison; it is not

  2342      *          locale-dependent.

  2343      * @since   1.2

  2344      */

  2345     public int compareTo(Character anotherCharacter) {

  2346         return compare(this.value, anotherCharacter.value);

  2347     }

  2349     /**

  2350      * Compares two {@code char} values numerically.

  2351      * The value returned is identical to what would be returned by:

  2352      * <pre>

  2353      *    Character.valueOf(x).compareTo(Character.valueOf(y))

  2354      * </pre>

  2355      *

  2356      * @param  x the first {@code char} to compare

  2357      * @param  y the second {@code char} to compare

  2358      * @return the value {@code 0} if {@code x == y};

  2359      *         a value less than {@code 0} if {@code x < y}; and

  2360      *         a value greater than {@code 0} if {@code x > y}

  2361      * @since 1.7

  2362      */

  2363     public static int compare(char x, char y) {

  2364         return x - y;

  2365     }

  2368     /**

  2369      * The number of bits used to represent a <tt>char</tt> value in unsigned

  2370      * binary form, constant {@code 16}.

  2371      *

  2372      * @since 1.5

  2373      */

  2374     public static final int SIZE = 16;

  2376     /**

  2377      * Returns the value obtained by reversing the order of the bytes in the

  2378      * specified <tt>char</tt> value.

  2379      *

  2380      * @return the value obtained by reversing (or, equivalently, swapping)

  2381      *     the bytes in the specified <tt>char</tt> value.

  2382      * @since 1.5

  2383      */

  2384     public static char reverseBytes(char ch) {

  2385         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));

  2386     }

  2388 }

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Sat, 15 Dec 2012 08:17:45 +0100
changeset 322	3884815c0629
parent 68	a2924470187b
child 326	23b4a344fe02
permissions	-rw-r--r--