emul/src/main/java/java/lang/Character.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Sun, 30 Sep 2012 18:40:47 -0700
branchemul
changeset 85 9f3c454e74d4
parent 68 a2924470187b
child 326 23b4a344fe02
permissions -rw-r--r--
Removing methods from Character
     1 /*
     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.lang;
    27 
    28 /**
    29  * The {@code Character} class wraps a value of the primitive
    30  * type {@code char} in an object. An object of type
    31  * {@code Character} contains a single field whose type is
    32  * {@code char}.
    33  * <p>
    34  * In addition, this class provides several methods for determining
    35  * a character's category (lowercase letter, digit, etc.) and for converting
    36  * characters from uppercase to lowercase and vice versa.
    37  * <p>
    38  * Character information is based on the Unicode Standard, version 6.0.0.
    39  * <p>
    40  * The methods and data of class {@code Character} are defined by
    41  * the information in the <i>UnicodeData</i> file that is part of the
    42  * Unicode Character Database maintained by the Unicode
    43  * Consortium. This file specifies various properties including name
    44  * and general category for every defined Unicode code point or
    45  * character range.
    46  * <p>
    47  * The file and its description are available from the Unicode Consortium at:
    48  * <ul>
    49  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
    50  * </ul>
    51  *
    52  * <h4><a name="unicode">Unicode Character Representations</a></h4>
    53  *
    54  * <p>The {@code char} data type (and therefore the value that a
    55  * {@code Character} object encapsulates) are based on the
    56  * original Unicode specification, which defined characters as
    57  * fixed-width 16-bit entities. The Unicode Standard has since been
    58  * changed to allow for characters whose representation requires more
    59  * than 16 bits.  The range of legal <em>code point</em>s is now
    60  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
    61  * (Refer to the <a
    62  * href="http://www.unicode.org/reports/tr27/#notation"><i>
    63  * definition</i></a> of the U+<i>n</i> notation in the Unicode
    64  * Standard.)
    65  *
    66  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
    67  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
    68  * <a name="supplementary">Characters</a> whose code points are greater
    69  * than U+FFFF are called <em>supplementary character</em>s.  The Java
    70  * platform uses the UTF-16 representation in {@code char} arrays and
    71  * in the {@code String} and {@code StringBuffer} classes. In
    72  * this representation, supplementary characters are represented as a pair
    73  * of {@code char} values, the first from the <em>high-surrogates</em>
    74  * range, (&#92;uD800-&#92;uDBFF), the second from the
    75  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
    76  *
    77  * <p>A {@code char} value, therefore, represents Basic
    78  * Multilingual Plane (BMP) code points, including the surrogate
    79  * code points, or code units of the UTF-16 encoding. An
    80  * {@code int} value represents all Unicode code points,
    81  * including supplementary code points. The lower (least significant)
    82  * 21 bits of {@code int} are used to represent Unicode code
    83  * points and the upper (most significant) 11 bits must be zero.
    84  * Unless otherwise specified, the behavior with respect to
    85  * supplementary characters and surrogate {@code char} values is
    86  * as follows:
    87  *
    88  * <ul>
    89  * <li>The methods that only accept a {@code char} value cannot support
    90  * supplementary characters. They treat {@code char} values from the
    91  * surrogate ranges as undefined characters. For example,
    92  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
    93  * this specific value if followed by any low-surrogate value in a string
    94  * would represent a letter.
    95  *
    96  * <li>The methods that accept an {@code int} value support all
    97  * Unicode characters, including supplementary characters. For
    98  * example, {@code Character.isLetter(0x2F81A)} returns
    99  * {@code true} because the code point value represents a letter
   100  * (a CJK ideograph).
   101  * </ul>
   102  *
   103  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
   104  * used for character values in the range between U+0000 and U+10FFFF,
   105  * and <em>Unicode code unit</em> is used for 16-bit
   106  * {@code char} values that are code units of the <em>UTF-16</em>
   107  * encoding. For more information on Unicode terminology, refer to the
   108  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
   109  *
   110  * @author  Lee Boynton
   111  * @author  Guy Steele
   112  * @author  Akira Tanaka
   113  * @author  Martin Buchholz
   114  * @author  Ulf Zibis
   115  * @since   1.0
   116  */
   117 public final
   118 class Character implements java.io.Serializable, Comparable<Character> {
   119     /**
   120      * The minimum radix available for conversion to and from strings.
   121      * The constant value of this field is the smallest value permitted
   122      * for the radix argument in radix-conversion methods such as the
   123      * {@code digit} method, the {@code forDigit} method, and the
   124      * {@code toString} method of class {@code Integer}.
   125      *
   126      * @see     Character#digit(char, int)
   127      * @see     Character#forDigit(int, int)
   128      * @see     Integer#toString(int, int)
   129      * @see     Integer#valueOf(String)
   130      */
   131     public static final int MIN_RADIX = 2;
   132 
   133     /**
   134      * The maximum radix available for conversion to and from strings.
   135      * The constant value of this field is the largest value permitted
   136      * for the radix argument in radix-conversion methods such as the
   137      * {@code digit} method, the {@code forDigit} method, and the
   138      * {@code toString} method of class {@code Integer}.
   139      *
   140      * @see     Character#digit(char, int)
   141      * @see     Character#forDigit(int, int)
   142      * @see     Integer#toString(int, int)
   143      * @see     Integer#valueOf(String)
   144      */
   145     public static final int MAX_RADIX = 36;
   146 
   147     /**
   148      * The constant value of this field is the smallest value of type
   149      * {@code char}, {@code '\u005Cu0000'}.
   150      *
   151      * @since   1.0.2
   152      */
   153     public static final char MIN_VALUE = '\u0000';
   154 
   155     /**
   156      * The constant value of this field is the largest value of type
   157      * {@code char}, {@code '\u005CuFFFF'}.
   158      *
   159      * @since   1.0.2
   160      */
   161     public static final char MAX_VALUE = '\uFFFF';
   162 
   163     /**
   164      * The {@code Class} instance representing the primitive type
   165      * {@code char}.
   166      *
   167      * @since   1.1
   168      */
   169     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
   170 
   171     /*
   172      * Normative general types
   173      */
   174 
   175     /*
   176      * General character types
   177      */
   178 
   179     /**
   180      * General category "Cn" in the Unicode specification.
   181      * @since   1.1
   182      */
   183     public static final byte UNASSIGNED = 0;
   184 
   185     /**
   186      * General category "Lu" in the Unicode specification.
   187      * @since   1.1
   188      */
   189     public static final byte UPPERCASE_LETTER = 1;
   190 
   191     /**
   192      * General category "Ll" in the Unicode specification.
   193      * @since   1.1
   194      */
   195     public static final byte LOWERCASE_LETTER = 2;
   196 
   197     /**
   198      * General category "Lt" in the Unicode specification.
   199      * @since   1.1
   200      */
   201     public static final byte TITLECASE_LETTER = 3;
   202 
   203     /**
   204      * General category "Lm" in the Unicode specification.
   205      * @since   1.1
   206      */
   207     public static final byte MODIFIER_LETTER = 4;
   208 
   209     /**
   210      * General category "Lo" in the Unicode specification.
   211      * @since   1.1
   212      */
   213     public static final byte OTHER_LETTER = 5;
   214 
   215     /**
   216      * General category "Mn" in the Unicode specification.
   217      * @since   1.1
   218      */
   219     public static final byte NON_SPACING_MARK = 6;
   220 
   221     /**
   222      * General category "Me" in the Unicode specification.
   223      * @since   1.1
   224      */
   225     public static final byte ENCLOSING_MARK = 7;
   226 
   227     /**
   228      * General category "Mc" in the Unicode specification.
   229      * @since   1.1
   230      */
   231     public static final byte COMBINING_SPACING_MARK = 8;
   232 
   233     /**
   234      * General category "Nd" in the Unicode specification.
   235      * @since   1.1
   236      */
   237     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
   238 
   239     /**
   240      * General category "Nl" in the Unicode specification.
   241      * @since   1.1
   242      */
   243     public static final byte LETTER_NUMBER = 10;
   244 
   245     /**
   246      * General category "No" in the Unicode specification.
   247      * @since   1.1
   248      */
   249     public static final byte OTHER_NUMBER = 11;
   250 
   251     /**
   252      * General category "Zs" in the Unicode specification.
   253      * @since   1.1
   254      */
   255     public static final byte SPACE_SEPARATOR = 12;
   256 
   257     /**
   258      * General category "Zl" in the Unicode specification.
   259      * @since   1.1
   260      */
   261     public static final byte LINE_SEPARATOR = 13;
   262 
   263     /**
   264      * General category "Zp" in the Unicode specification.
   265      * @since   1.1
   266      */
   267     public static final byte PARAGRAPH_SEPARATOR = 14;
   268 
   269     /**
   270      * General category "Cc" in the Unicode specification.
   271      * @since   1.1
   272      */
   273     public static final byte CONTROL = 15;
   274 
   275     /**
   276      * General category "Cf" in the Unicode specification.
   277      * @since   1.1
   278      */
   279     public static final byte FORMAT = 16;
   280 
   281     /**
   282      * General category "Co" in the Unicode specification.
   283      * @since   1.1
   284      */
   285     public static final byte PRIVATE_USE = 18;
   286 
   287     /**
   288      * General category "Cs" in the Unicode specification.
   289      * @since   1.1
   290      */
   291     public static final byte SURROGATE = 19;
   292 
   293     /**
   294      * General category "Pd" in the Unicode specification.
   295      * @since   1.1
   296      */
   297     public static final byte DASH_PUNCTUATION = 20;
   298 
   299     /**
   300      * General category "Ps" in the Unicode specification.
   301      * @since   1.1
   302      */
   303     public static final byte START_PUNCTUATION = 21;
   304 
   305     /**
   306      * General category "Pe" in the Unicode specification.
   307      * @since   1.1
   308      */
   309     public static final byte END_PUNCTUATION = 22;
   310 
   311     /**
   312      * General category "Pc" in the Unicode specification.
   313      * @since   1.1
   314      */
   315     public static final byte CONNECTOR_PUNCTUATION = 23;
   316 
   317     /**
   318      * General category "Po" in the Unicode specification.
   319      * @since   1.1
   320      */
   321     public static final byte OTHER_PUNCTUATION = 24;
   322 
   323     /**
   324      * General category "Sm" in the Unicode specification.
   325      * @since   1.1
   326      */
   327     public static final byte MATH_SYMBOL = 25;
   328 
   329     /**
   330      * General category "Sc" in the Unicode specification.
   331      * @since   1.1
   332      */
   333     public static final byte CURRENCY_SYMBOL = 26;
   334 
   335     /**
   336      * General category "Sk" in the Unicode specification.
   337      * @since   1.1
   338      */
   339     public static final byte MODIFIER_SYMBOL = 27;
   340 
   341     /**
   342      * General category "So" in the Unicode specification.
   343      * @since   1.1
   344      */
   345     public static final byte OTHER_SYMBOL = 28;
   346 
   347     /**
   348      * General category "Pi" in the Unicode specification.
   349      * @since   1.4
   350      */
   351     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
   352 
   353     /**
   354      * General category "Pf" in the Unicode specification.
   355      * @since   1.4
   356      */
   357     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
   358 
   359     /**
   360      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
   361      */
   362     static final int ERROR = 0xFFFFFFFF;
   363 
   364 
   365     /**
   366      * Undefined bidirectional character type. Undefined {@code char}
   367      * values have undefined directionality in the Unicode specification.
   368      * @since 1.4
   369      */
   370     public static final byte DIRECTIONALITY_UNDEFINED = -1;
   371 
   372     /**
   373      * Strong bidirectional character type "L" in the Unicode specification.
   374      * @since 1.4
   375      */
   376     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
   377 
   378     /**
   379      * Strong bidirectional character type "R" in the Unicode specification.
   380      * @since 1.4
   381      */
   382     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
   383 
   384     /**
   385     * Strong bidirectional character type "AL" in the Unicode specification.
   386      * @since 1.4
   387      */
   388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
   389 
   390     /**
   391      * Weak bidirectional character type "EN" in the Unicode specification.
   392      * @since 1.4
   393      */
   394     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
   395 
   396     /**
   397      * Weak bidirectional character type "ES" in the Unicode specification.
   398      * @since 1.4
   399      */
   400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
   401 
   402     /**
   403      * Weak bidirectional character type "ET" in the Unicode specification.
   404      * @since 1.4
   405      */
   406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
   407 
   408     /**
   409      * Weak bidirectional character type "AN" in the Unicode specification.
   410      * @since 1.4
   411      */
   412     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
   413 
   414     /**
   415      * Weak bidirectional character type "CS" in the Unicode specification.
   416      * @since 1.4
   417      */
   418     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
   419 
   420     /**
   421      * Weak bidirectional character type "NSM" in the Unicode specification.
   422      * @since 1.4
   423      */
   424     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
   425 
   426     /**
   427      * Weak bidirectional character type "BN" in the Unicode specification.
   428      * @since 1.4
   429      */
   430     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
   431 
   432     /**
   433      * Neutral bidirectional character type "B" in the Unicode specification.
   434      * @since 1.4
   435      */
   436     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
   437 
   438     /**
   439      * Neutral bidirectional character type "S" in the Unicode specification.
   440      * @since 1.4
   441      */
   442     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
   443 
   444     /**
   445      * Neutral bidirectional character type "WS" in the Unicode specification.
   446      * @since 1.4
   447      */
   448     public static final byte DIRECTIONALITY_WHITESPACE = 12;
   449 
   450     /**
   451      * Neutral bidirectional character type "ON" in the Unicode specification.
   452      * @since 1.4
   453      */
   454     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
   455 
   456     /**
   457      * Strong bidirectional character type "LRE" in the Unicode specification.
   458      * @since 1.4
   459      */
   460     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
   461 
   462     /**
   463      * Strong bidirectional character type "LRO" in the Unicode specification.
   464      * @since 1.4
   465      */
   466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
   467 
   468     /**
   469      * Strong bidirectional character type "RLE" in the Unicode specification.
   470      * @since 1.4
   471      */
   472     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
   473 
   474     /**
   475      * Strong bidirectional character type "RLO" in the Unicode specification.
   476      * @since 1.4
   477      */
   478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
   479 
   480     /**
   481      * Weak bidirectional character type "PDF" in the Unicode specification.
   482      * @since 1.4
   483      */
   484     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
   485 
   486     /**
   487      * The minimum value of a
   488      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   489      * Unicode high-surrogate code unit</a>
   490      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
   491      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   492      *
   493      * @since 1.5
   494      */
   495     public static final char MIN_HIGH_SURROGATE = '\uD800';
   496 
   497     /**
   498      * The maximum value of a
   499      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   500      * Unicode high-surrogate code unit</a>
   501      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
   502      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   503      *
   504      * @since 1.5
   505      */
   506     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
   507 
   508     /**
   509      * The minimum value of a
   510      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   511      * Unicode low-surrogate code unit</a>
   512      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
   513      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   514      *
   515      * @since 1.5
   516      */
   517     public static final char MIN_LOW_SURROGATE  = '\uDC00';
   518 
   519     /**
   520      * The maximum value of a
   521      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   522      * Unicode low-surrogate code unit</a>
   523      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   524      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   525      *
   526      * @since 1.5
   527      */
   528     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
   529 
   530     /**
   531      * The minimum value of a Unicode surrogate code unit in the
   532      * UTF-16 encoding, constant {@code '\u005CuD800'}.
   533      *
   534      * @since 1.5
   535      */
   536     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
   537 
   538     /**
   539      * The maximum value of a Unicode surrogate code unit in the
   540      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   541      *
   542      * @since 1.5
   543      */
   544     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
   545 
   546     /**
   547      * The minimum value of a
   548      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
   549      * Unicode supplementary code point</a>, constant {@code U+10000}.
   550      *
   551      * @since 1.5
   552      */
   553     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
   554 
   555     /**
   556      * The minimum value of a
   557      * <a href="http://www.unicode.org/glossary/#code_point">
   558      * Unicode code point</a>, constant {@code U+0000}.
   559      *
   560      * @since 1.5
   561      */
   562     public static final int MIN_CODE_POINT = 0x000000;
   563 
   564     /**
   565      * The maximum value of a
   566      * <a href="http://www.unicode.org/glossary/#code_point">
   567      * Unicode code point</a>, constant {@code U+10FFFF}.
   568      *
   569      * @since 1.5
   570      */
   571     public static final int MAX_CODE_POINT = 0X10FFFF;
   572 
   573 
   574     /**
   575      * Instances of this class represent particular subsets of the Unicode
   576      * character set.  The only family of subsets defined in the
   577      * {@code Character} class is {@link Character.UnicodeBlock}.
   578      * Other portions of the Java API may define other subsets for their
   579      * own purposes.
   580      *
   581      * @since 1.2
   582      */
   583     public static class Subset  {
   584 
   585         private String name;
   586 
   587         /**
   588          * Constructs a new {@code Subset} instance.
   589          *
   590          * @param  name  The name of this subset
   591          * @exception NullPointerException if name is {@code null}
   592          */
   593         protected Subset(String name) {
   594             if (name == null) {
   595                 throw new NullPointerException("name");
   596             }
   597             this.name = name;
   598         }
   599 
   600         /**
   601          * Compares two {@code Subset} objects for equality.
   602          * This method returns {@code true} if and only if
   603          * {@code this} and the argument refer to the same
   604          * object; since this method is {@code final}, this
   605          * guarantee holds for all subclasses.
   606          */
   607         public final boolean equals(Object obj) {
   608             return (this == obj);
   609         }
   610 
   611         /**
   612          * Returns the standard hash code as defined by the
   613          * {@link Object#hashCode} method.  This method
   614          * is {@code final} in order to ensure that the
   615          * {@code equals} and {@code hashCode} methods will
   616          * be consistent in all subclasses.
   617          */
   618         public final int hashCode() {
   619             return super.hashCode();
   620         }
   621 
   622         /**
   623          * Returns the name of this subset.
   624          */
   625         public final String toString() {
   626             return name;
   627         }
   628     }
   629 
   630     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
   631     // for the latest specification of Unicode Blocks.
   632 
   633 
   634     /**
   635      * The value of the {@code Character}.
   636      *
   637      * @serial
   638      */
   639     private final char value;
   640 
   641     /** use serialVersionUID from JDK 1.0.2 for interoperability */
   642     private static final long serialVersionUID = 3786198910865385080L;
   643 
   644     /**
   645      * Constructs a newly allocated {@code Character} object that
   646      * represents the specified {@code char} value.
   647      *
   648      * @param  value   the value to be represented by the
   649      *                  {@code Character} object.
   650      */
   651     public Character(char value) {
   652         this.value = value;
   653     }
   654 
   655     private static class CharacterCache {
   656         private CharacterCache(){}
   657 
   658         static final Character cache[] = new Character[127 + 1];
   659 
   660         static {
   661             for (int i = 0; i < cache.length; i++)
   662                 cache[i] = new Character((char)i);
   663         }
   664     }
   665 
   666     /**
   667      * Returns a <tt>Character</tt> instance representing the specified
   668      * <tt>char</tt> value.
   669      * If a new <tt>Character</tt> instance is not required, this method
   670      * should generally be used in preference to the constructor
   671      * {@link #Character(char)}, as this method is likely to yield
   672      * significantly better space and time performance by caching
   673      * frequently requested values.
   674      *
   675      * This method will always cache values in the range {@code
   676      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   677      * cache other values outside of this range.
   678      *
   679      * @param  c a char value.
   680      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   681      * @since  1.5
   682      */
   683     public static Character valueOf(char c) {
   684         if (c <= 127) { // must cache
   685             return CharacterCache.cache[(int)c];
   686         }
   687         return new Character(c);
   688     }
   689 
   690     /**
   691      * Returns the value of this {@code Character} object.
   692      * @return  the primitive {@code char} value represented by
   693      *          this object.
   694      */
   695     public char charValue() {
   696         return value;
   697     }
   698 
   699     /**
   700      * Returns a hash code for this {@code Character}; equal to the result
   701      * of invoking {@code charValue()}.
   702      *
   703      * @return a hash code value for this {@code Character}
   704      */
   705     public int hashCode() {
   706         return (int)value;
   707     }
   708 
   709     /**
   710      * Compares this object against the specified object.
   711      * The result is {@code true} if and only if the argument is not
   712      * {@code null} and is a {@code Character} object that
   713      * represents the same {@code char} value as this object.
   714      *
   715      * @param   obj   the object to compare with.
   716      * @return  {@code true} if the objects are the same;
   717      *          {@code false} otherwise.
   718      */
   719     public boolean equals(Object obj) {
   720         if (obj instanceof Character) {
   721             return value == ((Character)obj).charValue();
   722         }
   723         return false;
   724     }
   725 
   726     /**
   727      * Returns a {@code String} object representing this
   728      * {@code Character}'s value.  The result is a string of
   729      * length 1 whose sole component is the primitive
   730      * {@code char} value represented by this
   731      * {@code Character} object.
   732      *
   733      * @return  a string representation of this object.
   734      */
   735     public String toString() {
   736         char buf[] = {value};
   737         return String.valueOf(buf);
   738     }
   739 
   740     /**
   741      * Returns a {@code String} object representing the
   742      * specified {@code char}.  The result is a string of length
   743      * 1 consisting solely of the specified {@code char}.
   744      *
   745      * @param c the {@code char} to be converted
   746      * @return the string representation of the specified {@code char}
   747      * @since 1.4
   748      */
   749     public static String toString(char c) {
   750         return String.valueOf(c);
   751     }
   752 
   753     /**
   754      * Determines whether the specified code point is a valid
   755      * <a href="http://www.unicode.org/glossary/#code_point">
   756      * Unicode code point value</a>.
   757      *
   758      * @param  codePoint the Unicode code point to be tested
   759      * @return {@code true} if the specified code point value is between
   760      *         {@link #MIN_CODE_POINT} and
   761      *         {@link #MAX_CODE_POINT} inclusive;
   762      *         {@code false} otherwise.
   763      * @since  1.5
   764      */
   765     public static boolean isValidCodePoint(int codePoint) {
   766         // Optimized form of:
   767         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   768         int plane = codePoint >>> 16;
   769         return plane < ((MAX_CODE_POINT + 1) >>> 16);
   770     }
   771 
   772     /**
   773      * Determines whether the specified character (Unicode code point)
   774      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   775      * Such code points can be represented using a single {@code char}.
   776      *
   777      * @param  codePoint the character (Unicode code point) to be tested
   778      * @return {@code true} if the specified code point is between
   779      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   780      *         {@code false} otherwise.
   781      * @since  1.7
   782      */
   783     public static boolean isBmpCodePoint(int codePoint) {
   784         return codePoint >>> 16 == 0;
   785         // Optimized form of:
   786         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   787         // We consistently use logical shift (>>>) to facilitate
   788         // additional runtime optimizations.
   789     }
   790 
   791     /**
   792      * Determines whether the specified character (Unicode code point)
   793      * is in the <a href="#supplementary">supplementary character</a> range.
   794      *
   795      * @param  codePoint the character (Unicode code point) to be tested
   796      * @return {@code true} if the specified code point is between
   797      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   798      *         {@link #MAX_CODE_POINT} inclusive;
   799      *         {@code false} otherwise.
   800      * @since  1.5
   801      */
   802     public static boolean isSupplementaryCodePoint(int codePoint) {
   803         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   804             && codePoint <  MAX_CODE_POINT + 1;
   805     }
   806 
   807     /**
   808      * Determines if the given {@code char} value is a
   809      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   810      * Unicode high-surrogate code unit</a>
   811      * (also known as <i>leading-surrogate code unit</i>).
   812      *
   813      * <p>Such values do not represent characters by themselves,
   814      * but are used in the representation of
   815      * <a href="#supplementary">supplementary characters</a>
   816      * in the UTF-16 encoding.
   817      *
   818      * @param  ch the {@code char} value to be tested.
   819      * @return {@code true} if the {@code char} value is between
   820      *         {@link #MIN_HIGH_SURROGATE} and
   821      *         {@link #MAX_HIGH_SURROGATE} inclusive;
   822      *         {@code false} otherwise.
   823      * @see    Character#isLowSurrogate(char)
   824      * @see    Character.UnicodeBlock#of(int)
   825      * @since  1.5
   826      */
   827     public static boolean isHighSurrogate(char ch) {
   828         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   829         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   830     }
   831 
   832     /**
   833      * Determines if the given {@code char} value is a
   834      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   835      * Unicode low-surrogate code unit</a>
   836      * (also known as <i>trailing-surrogate code unit</i>).
   837      *
   838      * <p>Such values do not represent characters by themselves,
   839      * but are used in the representation of
   840      * <a href="#supplementary">supplementary characters</a>
   841      * in the UTF-16 encoding.
   842      *
   843      * @param  ch the {@code char} value to be tested.
   844      * @return {@code true} if the {@code char} value is between
   845      *         {@link #MIN_LOW_SURROGATE} and
   846      *         {@link #MAX_LOW_SURROGATE} inclusive;
   847      *         {@code false} otherwise.
   848      * @see    Character#isHighSurrogate(char)
   849      * @since  1.5
   850      */
   851     public static boolean isLowSurrogate(char ch) {
   852         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   853     }
   854 
   855     /**
   856      * Determines if the given {@code char} value is a Unicode
   857      * <i>surrogate code unit</i>.
   858      *
   859      * <p>Such values do not represent characters by themselves,
   860      * but are used in the representation of
   861      * <a href="#supplementary">supplementary characters</a>
   862      * in the UTF-16 encoding.
   863      *
   864      * <p>A char value is a surrogate code unit if and only if it is either
   865      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   866      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   867      *
   868      * @param  ch the {@code char} value to be tested.
   869      * @return {@code true} if the {@code char} value is between
   870      *         {@link #MIN_SURROGATE} and
   871      *         {@link #MAX_SURROGATE} inclusive;
   872      *         {@code false} otherwise.
   873      * @since  1.7
   874      */
   875     public static boolean isSurrogate(char ch) {
   876         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   877     }
   878 
   879     /**
   880      * Determines whether the specified pair of {@code char}
   881      * values is a valid
   882      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   883      * Unicode surrogate pair</a>.
   884 
   885      * <p>This method is equivalent to the expression:
   886      * <blockquote><pre>
   887      * isHighSurrogate(high) && isLowSurrogate(low)
   888      * </pre></blockquote>
   889      *
   890      * @param  high the high-surrogate code value to be tested
   891      * @param  low the low-surrogate code value to be tested
   892      * @return {@code true} if the specified high and
   893      * low-surrogate code values represent a valid surrogate pair;
   894      * {@code false} otherwise.
   895      * @since  1.5
   896      */
   897     public static boolean isSurrogatePair(char high, char low) {
   898         return isHighSurrogate(high) && isLowSurrogate(low);
   899     }
   900 
   901     /**
   902      * Determines the number of {@code char} values needed to
   903      * represent the specified character (Unicode code point). If the
   904      * specified character is equal to or greater than 0x10000, then
   905      * the method returns 2. Otherwise, the method returns 1.
   906      *
   907      * <p>This method doesn't validate the specified character to be a
   908      * valid Unicode code point. The caller must validate the
   909      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   910      * if necessary.
   911      *
   912      * @param   codePoint the character (Unicode code point) to be tested.
   913      * @return  2 if the character is a valid supplementary character; 1 otherwise.
   914      * @see     Character#isSupplementaryCodePoint(int)
   915      * @since   1.5
   916      */
   917     public static int charCount(int codePoint) {
   918         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   919     }
   920 
   921     /**
   922      * Converts the specified surrogate pair to its supplementary code
   923      * point value. This method does not validate the specified
   924      * surrogate pair. The caller must validate it using {@link
   925      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   926      *
   927      * @param  high the high-surrogate code unit
   928      * @param  low the low-surrogate code unit
   929      * @return the supplementary code point composed from the
   930      *         specified surrogate pair.
   931      * @since  1.5
   932      */
   933     public static int toCodePoint(char high, char low) {
   934         // Optimized form of:
   935         // return ((high - MIN_HIGH_SURROGATE) << 10)
   936         //         + (low - MIN_LOW_SURROGATE)
   937         //         + MIN_SUPPLEMENTARY_CODE_POINT;
   938         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   939                                        - (MIN_HIGH_SURROGATE << 10)
   940                                        - MIN_LOW_SURROGATE);
   941     }
   942 
   943     /**
   944      * Returns the code point at the given index of the
   945      * {@code CharSequence}. If the {@code char} value at
   946      * the given index in the {@code CharSequence} is in the
   947      * high-surrogate range, the following index is less than the
   948      * length of the {@code CharSequence}, and the
   949      * {@code char} value at the following index is in the
   950      * low-surrogate range, then the supplementary code point
   951      * corresponding to this surrogate pair is returned. Otherwise,
   952      * the {@code char} value at the given index is returned.
   953      *
   954      * @param seq a sequence of {@code char} values (Unicode code
   955      * units)
   956      * @param index the index to the {@code char} values (Unicode
   957      * code units) in {@code seq} to be converted
   958      * @return the Unicode code point at the given index
   959      * @exception NullPointerException if {@code seq} is null.
   960      * @exception IndexOutOfBoundsException if the value
   961      * {@code index} is negative or not less than
   962      * {@link CharSequence#length() seq.length()}.
   963      * @since  1.5
   964      */
   965     public static int codePointAt(CharSequence seq, int index) {
   966         char c1 = seq.charAt(index++);
   967         if (isHighSurrogate(c1)) {
   968             if (index < seq.length()) {
   969                 char c2 = seq.charAt(index);
   970                 if (isLowSurrogate(c2)) {
   971                     return toCodePoint(c1, c2);
   972                 }
   973             }
   974         }
   975         return c1;
   976     }
   977 
   978     /**
   979      * Returns the code point at the given index of the
   980      * {@code char} array. If the {@code char} value at
   981      * the given index in the {@code char} array is in the
   982      * high-surrogate range, the following index is less than the
   983      * length of the {@code char} array, and the
   984      * {@code char} value at the following index is in the
   985      * low-surrogate range, then the supplementary code point
   986      * corresponding to this surrogate pair is returned. Otherwise,
   987      * the {@code char} value at the given index is returned.
   988      *
   989      * @param a the {@code char} array
   990      * @param index the index to the {@code char} values (Unicode
   991      * code units) in the {@code char} array to be converted
   992      * @return the Unicode code point at the given index
   993      * @exception NullPointerException if {@code a} is null.
   994      * @exception IndexOutOfBoundsException if the value
   995      * {@code index} is negative or not less than
   996      * the length of the {@code char} array.
   997      * @since  1.5
   998      */
   999     public static int codePointAt(char[] a, int index) {
  1000         return codePointAtImpl(a, index, a.length);
  1001     }
  1002 
  1003     /**
  1004      * Returns the code point at the given index of the
  1005      * {@code char} array, where only array elements with
  1006      * {@code index} less than {@code limit} can be used. If
  1007      * the {@code char} value at the given index in the
  1008      * {@code char} array is in the high-surrogate range, the
  1009      * following index is less than the {@code limit}, and the
  1010      * {@code char} value at the following index is in the
  1011      * low-surrogate range, then the supplementary code point
  1012      * corresponding to this surrogate pair is returned. Otherwise,
  1013      * the {@code char} value at the given index is returned.
  1014      *
  1015      * @param a the {@code char} array
  1016      * @param index the index to the {@code char} values (Unicode
  1017      * code units) in the {@code char} array to be converted
  1018      * @param limit the index after the last array element that
  1019      * can be used in the {@code char} array
  1020      * @return the Unicode code point at the given index
  1021      * @exception NullPointerException if {@code a} is null.
  1022      * @exception IndexOutOfBoundsException if the {@code index}
  1023      * argument is negative or not less than the {@code limit}
  1024      * argument, or if the {@code limit} argument is negative or
  1025      * greater than the length of the {@code char} array.
  1026      * @since  1.5
  1027      */
  1028     public static int codePointAt(char[] a, int index, int limit) {
  1029         if (index >= limit || limit < 0 || limit > a.length) {
  1030             throw new IndexOutOfBoundsException();
  1031         }
  1032         return codePointAtImpl(a, index, limit);
  1033     }
  1034 
  1035     // throws ArrayIndexOutofBoundsException if index out of bounds
  1036     static int codePointAtImpl(char[] a, int index, int limit) {
  1037         char c1 = a[index++];
  1038         if (isHighSurrogate(c1)) {
  1039             if (index < limit) {
  1040                 char c2 = a[index];
  1041                 if (isLowSurrogate(c2)) {
  1042                     return toCodePoint(c1, c2);
  1043                 }
  1044             }
  1045         }
  1046         return c1;
  1047     }
  1048 
  1049     /**
  1050      * Returns the code point preceding the given index of the
  1051      * {@code CharSequence}. If the {@code char} value at
  1052      * {@code (index - 1)} in the {@code CharSequence} is in
  1053      * the low-surrogate range, {@code (index - 2)} is not
  1054      * negative, and the {@code char} value at {@code (index - 2)}
  1055      * in the {@code CharSequence} is in the
  1056      * high-surrogate range, then the supplementary code point
  1057      * corresponding to this surrogate pair is returned. Otherwise,
  1058      * the {@code char} value at {@code (index - 1)} is
  1059      * returned.
  1060      *
  1061      * @param seq the {@code CharSequence} instance
  1062      * @param index the index following the code point that should be returned
  1063      * @return the Unicode code point value before the given index.
  1064      * @exception NullPointerException if {@code seq} is null.
  1065      * @exception IndexOutOfBoundsException if the {@code index}
  1066      * argument is less than 1 or greater than {@link
  1067      * CharSequence#length() seq.length()}.
  1068      * @since  1.5
  1069      */
  1070     public static int codePointBefore(CharSequence seq, int index) {
  1071         char c2 = seq.charAt(--index);
  1072         if (isLowSurrogate(c2)) {
  1073             if (index > 0) {
  1074                 char c1 = seq.charAt(--index);
  1075                 if (isHighSurrogate(c1)) {
  1076                     return toCodePoint(c1, c2);
  1077                 }
  1078             }
  1079         }
  1080         return c2;
  1081     }
  1082 
  1083     /**
  1084      * Returns the code point preceding the given index of the
  1085      * {@code char} array. If the {@code char} value at
  1086      * {@code (index - 1)} in the {@code char} array is in
  1087      * the low-surrogate range, {@code (index - 2)} is not
  1088      * negative, and the {@code char} value at {@code (index - 2)}
  1089      * in the {@code char} array is in the
  1090      * high-surrogate range, then the supplementary code point
  1091      * corresponding to this surrogate pair is returned. Otherwise,
  1092      * the {@code char} value at {@code (index - 1)} is
  1093      * returned.
  1094      *
  1095      * @param a the {@code char} array
  1096      * @param index the index following the code point that should be returned
  1097      * @return the Unicode code point value before the given index.
  1098      * @exception NullPointerException if {@code a} is null.
  1099      * @exception IndexOutOfBoundsException if the {@code index}
  1100      * argument is less than 1 or greater than the length of the
  1101      * {@code char} array
  1102      * @since  1.5
  1103      */
  1104     public static int codePointBefore(char[] a, int index) {
  1105         return codePointBeforeImpl(a, index, 0);
  1106     }
  1107 
  1108     /**
  1109      * Returns the code point preceding the given index of the
  1110      * {@code char} array, where only array elements with
  1111      * {@code index} greater than or equal to {@code start}
  1112      * can be used. If the {@code char} value at {@code (index - 1)}
  1113      * in the {@code char} array is in the
  1114      * low-surrogate range, {@code (index - 2)} is not less than
  1115      * {@code start}, and the {@code char} value at
  1116      * {@code (index - 2)} in the {@code char} array is in
  1117      * the high-surrogate range, then the supplementary code point
  1118      * corresponding to this surrogate pair is returned. Otherwise,
  1119      * the {@code char} value at {@code (index - 1)} is
  1120      * returned.
  1121      *
  1122      * @param a the {@code char} array
  1123      * @param index the index following the code point that should be returned
  1124      * @param start the index of the first array element in the
  1125      * {@code char} array
  1126      * @return the Unicode code point value before the given index.
  1127      * @exception NullPointerException if {@code a} is null.
  1128      * @exception IndexOutOfBoundsException if the {@code index}
  1129      * argument is not greater than the {@code start} argument or
  1130      * is greater than the length of the {@code char} array, or
  1131      * if the {@code start} argument is negative or not less than
  1132      * the length of the {@code char} array.
  1133      * @since  1.5
  1134      */
  1135     public static int codePointBefore(char[] a, int index, int start) {
  1136         if (index <= start || start < 0 || start >= a.length) {
  1137             throw new IndexOutOfBoundsException();
  1138         }
  1139         return codePointBeforeImpl(a, index, start);
  1140     }
  1141 
  1142     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
  1143     static int codePointBeforeImpl(char[] a, int index, int start) {
  1144         char c2 = a[--index];
  1145         if (isLowSurrogate(c2)) {
  1146             if (index > start) {
  1147                 char c1 = a[--index];
  1148                 if (isHighSurrogate(c1)) {
  1149                     return toCodePoint(c1, c2);
  1150                 }
  1151             }
  1152         }
  1153         return c2;
  1154     }
  1155 
  1156     /**
  1157      * Returns the leading surrogate (a
  1158      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  1159      * high surrogate code unit</a>) of the
  1160      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1161      * surrogate pair</a>
  1162      * representing the specified supplementary character (Unicode
  1163      * code point) in the UTF-16 encoding.  If the specified character
  1164      * is not a
  1165      * <a href="Character.html#supplementary">supplementary character</a>,
  1166      * an unspecified {@code char} is returned.
  1167      *
  1168      * <p>If
  1169      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1170      * is {@code true}, then
  1171      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
  1172      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
  1173      * are also always {@code true}.
  1174      *
  1175      * @param   codePoint a supplementary character (Unicode code point)
  1176      * @return  the leading surrogate code unit used to represent the
  1177      *          character in the UTF-16 encoding
  1178      * @since   1.7
  1179      */
  1180     public static char highSurrogate(int codePoint) {
  1181         return (char) ((codePoint >>> 10)
  1182             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
  1183     }
  1184 
  1185     /**
  1186      * Returns the trailing surrogate (a
  1187      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  1188      * low surrogate code unit</a>) of the
  1189      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1190      * surrogate pair</a>
  1191      * representing the specified supplementary character (Unicode
  1192      * code point) in the UTF-16 encoding.  If the specified character
  1193      * is not a
  1194      * <a href="Character.html#supplementary">supplementary character</a>,
  1195      * an unspecified {@code char} is returned.
  1196      *
  1197      * <p>If
  1198      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1199      * is {@code true}, then
  1200      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
  1201      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
  1202      * are also always {@code true}.
  1203      *
  1204      * @param   codePoint a supplementary character (Unicode code point)
  1205      * @return  the trailing surrogate code unit used to represent the
  1206      *          character in the UTF-16 encoding
  1207      * @since   1.7
  1208      */
  1209     public static char lowSurrogate(int codePoint) {
  1210         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  1211     }
  1212 
  1213     /**
  1214      * Converts the specified character (Unicode code point) to its
  1215      * UTF-16 representation. If the specified code point is a BMP
  1216      * (Basic Multilingual Plane or Plane 0) value, the same value is
  1217      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
  1218      * specified code point is a supplementary character, its
  1219      * surrogate values are stored in {@code dst[dstIndex]}
  1220      * (high-surrogate) and {@code dst[dstIndex+1]}
  1221      * (low-surrogate), and 2 is returned.
  1222      *
  1223      * @param  codePoint the character (Unicode code point) to be converted.
  1224      * @param  dst an array of {@code char} in which the
  1225      * {@code codePoint}'s UTF-16 value is stored.
  1226      * @param dstIndex the start index into the {@code dst}
  1227      * array where the converted value is stored.
  1228      * @return 1 if the code point is a BMP code point, 2 if the
  1229      * code point is a supplementary code point.
  1230      * @exception IllegalArgumentException if the specified
  1231      * {@code codePoint} is not a valid Unicode code point.
  1232      * @exception NullPointerException if the specified {@code dst} is null.
  1233      * @exception IndexOutOfBoundsException if {@code dstIndex}
  1234      * is negative or not less than {@code dst.length}, or if
  1235      * {@code dst} at {@code dstIndex} doesn't have enough
  1236      * array element(s) to store the resulting {@code char}
  1237      * value(s). (If {@code dstIndex} is equal to
  1238      * {@code dst.length-1} and the specified
  1239      * {@code codePoint} is a supplementary character, the
  1240      * high-surrogate value is not stored in
  1241      * {@code dst[dstIndex]}.)
  1242      * @since  1.5
  1243      */
  1244     public static int toChars(int codePoint, char[] dst, int dstIndex) {
  1245         if (isBmpCodePoint(codePoint)) {
  1246             dst[dstIndex] = (char) codePoint;
  1247             return 1;
  1248         } else if (isValidCodePoint(codePoint)) {
  1249             toSurrogates(codePoint, dst, dstIndex);
  1250             return 2;
  1251         } else {
  1252             throw new IllegalArgumentException();
  1253         }
  1254     }
  1255 
  1256     /**
  1257      * Converts the specified character (Unicode code point) to its
  1258      * UTF-16 representation stored in a {@code char} array. If
  1259      * the specified code point is a BMP (Basic Multilingual Plane or
  1260      * Plane 0) value, the resulting {@code char} array has
  1261      * the same value as {@code codePoint}. If the specified code
  1262      * point is a supplementary code point, the resulting
  1263      * {@code char} array has the corresponding surrogate pair.
  1264      *
  1265      * @param  codePoint a Unicode code point
  1266      * @return a {@code char} array having
  1267      *         {@code codePoint}'s UTF-16 representation.
  1268      * @exception IllegalArgumentException if the specified
  1269      * {@code codePoint} is not a valid Unicode code point.
  1270      * @since  1.5
  1271      */
  1272     public static char[] toChars(int codePoint) {
  1273         if (isBmpCodePoint(codePoint)) {
  1274             return new char[] { (char) codePoint };
  1275         } else if (isValidCodePoint(codePoint)) {
  1276             char[] result = new char[2];
  1277             toSurrogates(codePoint, result, 0);
  1278             return result;
  1279         } else {
  1280             throw new IllegalArgumentException();
  1281         }
  1282     }
  1283 
  1284     static void toSurrogates(int codePoint, char[] dst, int index) {
  1285         // We write elements "backwards" to guarantee all-or-nothing
  1286         dst[index+1] = lowSurrogate(codePoint);
  1287         dst[index] = highSurrogate(codePoint);
  1288     }
  1289 
  1290     /**
  1291      * Returns the number of Unicode code points in the text range of
  1292      * the specified char sequence. The text range begins at the
  1293      * specified {@code beginIndex} and extends to the
  1294      * {@code char} at index {@code endIndex - 1}. Thus the
  1295      * length (in {@code char}s) of the text range is
  1296      * {@code endIndex-beginIndex}. Unpaired surrogates within
  1297      * the text range count as one code point each.
  1298      *
  1299      * @param seq the char sequence
  1300      * @param beginIndex the index to the first {@code char} of
  1301      * the text range.
  1302      * @param endIndex the index after the last {@code char} of
  1303      * the text range.
  1304      * @return the number of Unicode code points in the specified text
  1305      * range
  1306      * @exception NullPointerException if {@code seq} is null.
  1307      * @exception IndexOutOfBoundsException if the
  1308      * {@code beginIndex} is negative, or {@code endIndex}
  1309      * is larger than the length of the given sequence, or
  1310      * {@code beginIndex} is larger than {@code endIndex}.
  1311      * @since  1.5
  1312      */
  1313     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
  1314         int length = seq.length();
  1315         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
  1316             throw new IndexOutOfBoundsException();
  1317         }
  1318         int n = endIndex - beginIndex;
  1319         for (int i = beginIndex; i < endIndex; ) {
  1320             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
  1321                 isLowSurrogate(seq.charAt(i))) {
  1322                 n--;
  1323                 i++;
  1324             }
  1325         }
  1326         return n;
  1327     }
  1328 
  1329     /**
  1330      * Returns the number of Unicode code points in a subarray of the
  1331      * {@code char} array argument. The {@code offset}
  1332      * argument is the index of the first {@code char} of the
  1333      * subarray and the {@code count} argument specifies the
  1334      * length of the subarray in {@code char}s. Unpaired
  1335      * surrogates within the subarray count as one code point each.
  1336      *
  1337      * @param a the {@code char} array
  1338      * @param offset the index of the first {@code char} in the
  1339      * given {@code char} array
  1340      * @param count the length of the subarray in {@code char}s
  1341      * @return the number of Unicode code points in the specified subarray
  1342      * @exception NullPointerException if {@code a} is null.
  1343      * @exception IndexOutOfBoundsException if {@code offset} or
  1344      * {@code count} is negative, or if {@code offset +
  1345      * count} is larger than the length of the given array.
  1346      * @since  1.5
  1347      */
  1348     public static int codePointCount(char[] a, int offset, int count) {
  1349         if (count > a.length - offset || offset < 0 || count < 0) {
  1350             throw new IndexOutOfBoundsException();
  1351         }
  1352         return codePointCountImpl(a, offset, count);
  1353     }
  1354 
  1355     static int codePointCountImpl(char[] a, int offset, int count) {
  1356         int endIndex = offset + count;
  1357         int n = count;
  1358         for (int i = offset; i < endIndex; ) {
  1359             if (isHighSurrogate(a[i++]) && i < endIndex &&
  1360                 isLowSurrogate(a[i])) {
  1361                 n--;
  1362                 i++;
  1363             }
  1364         }
  1365         return n;
  1366     }
  1367 
  1368     /**
  1369      * Returns the index within the given char sequence that is offset
  1370      * from the given {@code index} by {@code codePointOffset}
  1371      * code points. Unpaired surrogates within the text range given by
  1372      * {@code index} and {@code codePointOffset} count as
  1373      * one code point each.
  1374      *
  1375      * @param seq the char sequence
  1376      * @param index the index to be offset
  1377      * @param codePointOffset the offset in code points
  1378      * @return the index within the char sequence
  1379      * @exception NullPointerException if {@code seq} is null.
  1380      * @exception IndexOutOfBoundsException if {@code index}
  1381      *   is negative or larger then the length of the char sequence,
  1382      *   or if {@code codePointOffset} is positive and the
  1383      *   subsequence starting with {@code index} has fewer than
  1384      *   {@code codePointOffset} code points, or if
  1385      *   {@code codePointOffset} is negative and the subsequence
  1386      *   before {@code index} has fewer than the absolute value
  1387      *   of {@code codePointOffset} code points.
  1388      * @since 1.5
  1389      */
  1390     public static int offsetByCodePoints(CharSequence seq, int index,
  1391                                          int codePointOffset) {
  1392         int length = seq.length();
  1393         if (index < 0 || index > length) {
  1394             throw new IndexOutOfBoundsException();
  1395         }
  1396 
  1397         int x = index;
  1398         if (codePointOffset >= 0) {
  1399             int i;
  1400             for (i = 0; x < length && i < codePointOffset; i++) {
  1401                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
  1402                     isLowSurrogate(seq.charAt(x))) {
  1403                     x++;
  1404                 }
  1405             }
  1406             if (i < codePointOffset) {
  1407                 throw new IndexOutOfBoundsException();
  1408             }
  1409         } else {
  1410             int i;
  1411             for (i = codePointOffset; x > 0 && i < 0; i++) {
  1412                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
  1413                     isHighSurrogate(seq.charAt(x-1))) {
  1414                     x--;
  1415                 }
  1416             }
  1417             if (i < 0) {
  1418                 throw new IndexOutOfBoundsException();
  1419             }
  1420         }
  1421         return x;
  1422     }
  1423 
  1424     /**
  1425      * Returns the index within the given {@code char} subarray
  1426      * that is offset from the given {@code index} by
  1427      * {@code codePointOffset} code points. The
  1428      * {@code start} and {@code count} arguments specify a
  1429      * subarray of the {@code char} array. Unpaired surrogates
  1430      * within the text range given by {@code index} and
  1431      * {@code codePointOffset} count as one code point each.
  1432      *
  1433      * @param a the {@code char} array
  1434      * @param start the index of the first {@code char} of the
  1435      * subarray
  1436      * @param count the length of the subarray in {@code char}s
  1437      * @param index the index to be offset
  1438      * @param codePointOffset the offset in code points
  1439      * @return the index within the subarray
  1440      * @exception NullPointerException if {@code a} is null.
  1441      * @exception IndexOutOfBoundsException
  1442      *   if {@code start} or {@code count} is negative,
  1443      *   or if {@code start + count} is larger than the length of
  1444      *   the given array,
  1445      *   or if {@code index} is less than {@code start} or
  1446      *   larger then {@code start + count},
  1447      *   or if {@code codePointOffset} is positive and the text range
  1448      *   starting with {@code index} and ending with {@code start + count - 1}
  1449      *   has fewer than {@code codePointOffset} code
  1450      *   points,
  1451      *   or if {@code codePointOffset} is negative and the text range
  1452      *   starting with {@code start} and ending with {@code index - 1}
  1453      *   has fewer than the absolute value of
  1454      *   {@code codePointOffset} code points.
  1455      * @since 1.5
  1456      */
  1457     public static int offsetByCodePoints(char[] a, int start, int count,
  1458                                          int index, int codePointOffset) {
  1459         if (count > a.length-start || start < 0 || count < 0
  1460             || index < start || index > start+count) {
  1461             throw new IndexOutOfBoundsException();
  1462         }
  1463         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
  1464     }
  1465 
  1466     static int offsetByCodePointsImpl(char[]a, int start, int count,
  1467                                       int index, int codePointOffset) {
  1468         int x = index;
  1469         if (codePointOffset >= 0) {
  1470             int limit = start + count;
  1471             int i;
  1472             for (i = 0; x < limit && i < codePointOffset; i++) {
  1473                 if (isHighSurrogate(a[x++]) && x < limit &&
  1474                     isLowSurrogate(a[x])) {
  1475                     x++;
  1476                 }
  1477             }
  1478             if (i < codePointOffset) {
  1479                 throw new IndexOutOfBoundsException();
  1480             }
  1481         } else {
  1482             int i;
  1483             for (i = codePointOffset; x > start && i < 0; i++) {
  1484                 if (isLowSurrogate(a[--x]) && x > start &&
  1485                     isHighSurrogate(a[x-1])) {
  1486                     x--;
  1487                 }
  1488             }
  1489             if (i < 0) {
  1490                 throw new IndexOutOfBoundsException();
  1491             }
  1492         }
  1493         return x;
  1494     }
  1495 
  1496     /**
  1497      * Determines if the specified character is a lowercase character.
  1498      * <p>
  1499      * A character is lowercase if its general category type, provided
  1500      * by {@code Character.getType(ch)}, is
  1501      * {@code LOWERCASE_LETTER}, or it has contributory property
  1502      * Other_Lowercase as defined by the Unicode Standard.
  1503      * <p>
  1504      * The following are examples of lowercase characters:
  1505      * <p><blockquote><pre>
  1506      * a b c d e f g h i j k l m n o p q r s t u v w x y z
  1507      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
  1508      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
  1509      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
  1510      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
  1511      * </pre></blockquote>
  1512      * <p> Many other Unicode characters are lowercase too.
  1513      *
  1514      * <p><b>Note:</b> This method cannot handle <a
  1515      * href="#supplementary"> supplementary characters</a>. To support
  1516      * all Unicode characters, including supplementary characters, use
  1517      * the {@link #isLowerCase(int)} method.
  1518      *
  1519      * @param   ch   the character to be tested.
  1520      * @return  {@code true} if the character is lowercase;
  1521      *          {@code false} otherwise.
  1522      * @see     Character#isLowerCase(char)
  1523      * @see     Character#isTitleCase(char)
  1524      * @see     Character#toLowerCase(char)
  1525      * @see     Character#getType(char)
  1526      */
  1527     public static boolean isLowerCase(char ch) {
  1528         throw new UnsupportedOperationException();
  1529     }
  1530 
  1531     /**
  1532      * Determines if the specified character is an uppercase character.
  1533      * <p>
  1534      * A character is uppercase if its general category type, provided by
  1535      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
  1536      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
  1537      * <p>
  1538      * The following are examples of uppercase characters:
  1539      * <p><blockquote><pre>
  1540      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  1541      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
  1542      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
  1543      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
  1544      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
  1545      * </pre></blockquote>
  1546      * <p> Many other Unicode characters are uppercase too.<p>
  1547      *
  1548      * <p><b>Note:</b> This method cannot handle <a
  1549      * href="#supplementary"> supplementary characters</a>. To support
  1550      * all Unicode characters, including supplementary characters, use
  1551      * the {@link #isUpperCase(int)} method.
  1552      *
  1553      * @param   ch   the character to be tested.
  1554      * @return  {@code true} if the character is uppercase;
  1555      *          {@code false} otherwise.
  1556      * @see     Character#isLowerCase(char)
  1557      * @see     Character#isTitleCase(char)
  1558      * @see     Character#toUpperCase(char)
  1559      * @see     Character#getType(char)
  1560      * @since   1.0
  1561      */
  1562     public static boolean isUpperCase(char ch) {
  1563         throw new UnsupportedOperationException();
  1564     }
  1565 
  1566     /**
  1567      * Determines if the specified character is a titlecase character.
  1568      * <p>
  1569      * A character is a titlecase character if its general
  1570      * category type, provided by {@code Character.getType(ch)},
  1571      * is {@code TITLECASE_LETTER}.
  1572      * <p>
  1573      * Some characters look like pairs of Latin letters. For example, there
  1574      * is an uppercase letter that looks like "LJ" and has a corresponding
  1575      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1576      * is the appropriate form to use when rendering a word in lowercase
  1577      * with initial capitals, as for a book title.
  1578      * <p>
  1579      * These are some of the Unicode characters for which this method returns
  1580      * {@code true}:
  1581      * <ul>
  1582      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1583      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1584      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1585      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1586      * </ul>
  1587      * <p> Many other Unicode characters are titlecase too.<p>
  1588      *
  1589      * <p><b>Note:</b> This method cannot handle <a
  1590      * href="#supplementary"> supplementary characters</a>. To support
  1591      * all Unicode characters, including supplementary characters, use
  1592      * the {@link #isTitleCase(int)} method.
  1593      *
  1594      * @param   ch   the character to be tested.
  1595      * @return  {@code true} if the character is titlecase;
  1596      *          {@code false} otherwise.
  1597      * @see     Character#isLowerCase(char)
  1598      * @see     Character#isUpperCase(char)
  1599      * @see     Character#toTitleCase(char)
  1600      * @see     Character#getType(char)
  1601      * @since   1.0.2
  1602      */
  1603     public static boolean isTitleCase(char ch) {
  1604         return isTitleCase((int)ch);
  1605     }
  1606 
  1607     /**
  1608      * Determines if the specified character (Unicode code point) is a titlecase character.
  1609      * <p>
  1610      * A character is a titlecase character if its general
  1611      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1612      * is {@code TITLECASE_LETTER}.
  1613      * <p>
  1614      * Some characters look like pairs of Latin letters. For example, there
  1615      * is an uppercase letter that looks like "LJ" and has a corresponding
  1616      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1617      * is the appropriate form to use when rendering a word in lowercase
  1618      * with initial capitals, as for a book title.
  1619      * <p>
  1620      * These are some of the Unicode characters for which this method returns
  1621      * {@code true}:
  1622      * <ul>
  1623      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1624      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1625      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1626      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1627      * </ul>
  1628      * <p> Many other Unicode characters are titlecase too.<p>
  1629      *
  1630      * @param   codePoint the character (Unicode code point) to be tested.
  1631      * @return  {@code true} if the character is titlecase;
  1632      *          {@code false} otherwise.
  1633      * @see     Character#isLowerCase(int)
  1634      * @see     Character#isUpperCase(int)
  1635      * @see     Character#toTitleCase(int)
  1636      * @see     Character#getType(int)
  1637      * @since   1.5
  1638      */
  1639     public static boolean isTitleCase(int codePoint) {
  1640         return getType(codePoint) == Character.TITLECASE_LETTER;
  1641     }
  1642 
  1643     /**
  1644      * Determines if the specified character is a digit.
  1645      * <p>
  1646      * A character is a digit if its general category type, provided
  1647      * by {@code Character.getType(ch)}, is
  1648      * {@code DECIMAL_DIGIT_NUMBER}.
  1649      * <p>
  1650      * Some Unicode character ranges that contain digits:
  1651      * <ul>
  1652      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1653      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1654      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1655      *     Arabic-Indic digits
  1656      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1657      *     Extended Arabic-Indic digits
  1658      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1659      *     Devanagari digits
  1660      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1661      *     Fullwidth digits
  1662      * </ul>
  1663      *
  1664      * Many other character ranges contain digits as well.
  1665      *
  1666      * <p><b>Note:</b> This method cannot handle <a
  1667      * href="#supplementary"> supplementary characters</a>. To support
  1668      * all Unicode characters, including supplementary characters, use
  1669      * the {@link #isDigit(int)} method.
  1670      *
  1671      * @param   ch   the character to be tested.
  1672      * @return  {@code true} if the character is a digit;
  1673      *          {@code false} otherwise.
  1674      * @see     Character#digit(char, int)
  1675      * @see     Character#forDigit(int, int)
  1676      * @see     Character#getType(char)
  1677      */
  1678     public static boolean isDigit(char ch) {
  1679         return isDigit((int)ch);
  1680     }
  1681 
  1682     /**
  1683      * Determines if the specified character (Unicode code point) is a digit.
  1684      * <p>
  1685      * A character is a digit if its general category type, provided
  1686      * by {@link Character#getType(int) getType(codePoint)}, is
  1687      * {@code DECIMAL_DIGIT_NUMBER}.
  1688      * <p>
  1689      * Some Unicode character ranges that contain digits:
  1690      * <ul>
  1691      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1692      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1693      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1694      *     Arabic-Indic digits
  1695      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1696      *     Extended Arabic-Indic digits
  1697      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1698      *     Devanagari digits
  1699      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1700      *     Fullwidth digits
  1701      * </ul>
  1702      *
  1703      * Many other character ranges contain digits as well.
  1704      *
  1705      * @param   codePoint the character (Unicode code point) to be tested.
  1706      * @return  {@code true} if the character is a digit;
  1707      *          {@code false} otherwise.
  1708      * @see     Character#forDigit(int, int)
  1709      * @see     Character#getType(int)
  1710      * @since   1.5
  1711      */
  1712     public static boolean isDigit(int codePoint) {
  1713         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
  1714     }
  1715 
  1716     /**
  1717      * Determines if a character is defined in Unicode.
  1718      * <p>
  1719      * A character is defined if at least one of the following is true:
  1720      * <ul>
  1721      * <li>It has an entry in the UnicodeData file.
  1722      * <li>It has a value in a range defined by the UnicodeData file.
  1723      * </ul>
  1724      *
  1725      * <p><b>Note:</b> This method cannot handle <a
  1726      * href="#supplementary"> supplementary characters</a>. To support
  1727      * all Unicode characters, including supplementary characters, use
  1728      * the {@link #isDefined(int)} method.
  1729      *
  1730      * @param   ch   the character to be tested
  1731      * @return  {@code true} if the character has a defined meaning
  1732      *          in Unicode; {@code false} otherwise.
  1733      * @see     Character#isDigit(char)
  1734      * @see     Character#isLetter(char)
  1735      * @see     Character#isLetterOrDigit(char)
  1736      * @see     Character#isLowerCase(char)
  1737      * @see     Character#isTitleCase(char)
  1738      * @see     Character#isUpperCase(char)
  1739      * @since   1.0.2
  1740      */
  1741     public static boolean isDefined(char ch) {
  1742         return isDefined((int)ch);
  1743     }
  1744 
  1745     /**
  1746      * Determines if a character (Unicode code point) is defined in Unicode.
  1747      * <p>
  1748      * A character is defined if at least one of the following is true:
  1749      * <ul>
  1750      * <li>It has an entry in the UnicodeData file.
  1751      * <li>It has a value in a range defined by the UnicodeData file.
  1752      * </ul>
  1753      *
  1754      * @param   codePoint the character (Unicode code point) to be tested.
  1755      * @return  {@code true} if the character has a defined meaning
  1756      *          in Unicode; {@code false} otherwise.
  1757      * @see     Character#isDigit(int)
  1758      * @see     Character#isLetter(int)
  1759      * @see     Character#isLetterOrDigit(int)
  1760      * @see     Character#isLowerCase(int)
  1761      * @see     Character#isTitleCase(int)
  1762      * @see     Character#isUpperCase(int)
  1763      * @since   1.5
  1764      */
  1765     public static boolean isDefined(int codePoint) {
  1766         return getType(codePoint) != Character.UNASSIGNED;
  1767     }
  1768 
  1769     /**
  1770      * Determines if the specified character is a letter.
  1771      * <p>
  1772      * A character is considered to be a letter if its general
  1773      * category type, provided by {@code Character.getType(ch)},
  1774      * is any of the following:
  1775      * <ul>
  1776      * <li> {@code UPPERCASE_LETTER}
  1777      * <li> {@code LOWERCASE_LETTER}
  1778      * <li> {@code TITLECASE_LETTER}
  1779      * <li> {@code MODIFIER_LETTER}
  1780      * <li> {@code OTHER_LETTER}
  1781      * </ul>
  1782      *
  1783      * Not all letters have case. Many characters are
  1784      * letters but are neither uppercase nor lowercase nor titlecase.
  1785      *
  1786      * <p><b>Note:</b> This method cannot handle <a
  1787      * href="#supplementary"> supplementary characters</a>. To support
  1788      * all Unicode characters, including supplementary characters, use
  1789      * the {@link #isLetter(int)} method.
  1790      *
  1791      * @param   ch   the character to be tested.
  1792      * @return  {@code true} if the character is a letter;
  1793      *          {@code false} otherwise.
  1794      * @see     Character#isDigit(char)
  1795      * @see     Character#isJavaIdentifierStart(char)
  1796      * @see     Character#isJavaLetter(char)
  1797      * @see     Character#isJavaLetterOrDigit(char)
  1798      * @see     Character#isLetterOrDigit(char)
  1799      * @see     Character#isLowerCase(char)
  1800      * @see     Character#isTitleCase(char)
  1801      * @see     Character#isUnicodeIdentifierStart(char)
  1802      * @see     Character#isUpperCase(char)
  1803      */
  1804     public static boolean isLetter(char ch) {
  1805         return isLetter((int)ch);
  1806     }
  1807 
  1808     /**
  1809      * Determines if the specified character (Unicode code point) is a letter.
  1810      * <p>
  1811      * A character is considered to be a letter if its general
  1812      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1813      * is any of the following:
  1814      * <ul>
  1815      * <li> {@code UPPERCASE_LETTER}
  1816      * <li> {@code LOWERCASE_LETTER}
  1817      * <li> {@code TITLECASE_LETTER}
  1818      * <li> {@code MODIFIER_LETTER}
  1819      * <li> {@code OTHER_LETTER}
  1820      * </ul>
  1821      *
  1822      * Not all letters have case. Many characters are
  1823      * letters but are neither uppercase nor lowercase nor titlecase.
  1824      *
  1825      * @param   codePoint the character (Unicode code point) to be tested.
  1826      * @return  {@code true} if the character is a letter;
  1827      *          {@code false} otherwise.
  1828      * @see     Character#isDigit(int)
  1829      * @see     Character#isJavaIdentifierStart(int)
  1830      * @see     Character#isLetterOrDigit(int)
  1831      * @see     Character#isLowerCase(int)
  1832      * @see     Character#isTitleCase(int)
  1833      * @see     Character#isUnicodeIdentifierStart(int)
  1834      * @see     Character#isUpperCase(int)
  1835      * @since   1.5
  1836      */
  1837     public static boolean isLetter(int codePoint) {
  1838         return ((((1 << Character.UPPERCASE_LETTER) |
  1839             (1 << Character.LOWERCASE_LETTER) |
  1840             (1 << Character.TITLECASE_LETTER) |
  1841             (1 << Character.MODIFIER_LETTER) |
  1842             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
  1843             != 0;
  1844     }
  1845 
  1846     /**
  1847      * Determines if the specified character is a letter or digit.
  1848      * <p>
  1849      * A character is considered to be a letter or digit if either
  1850      * {@code Character.isLetter(char ch)} or
  1851      * {@code Character.isDigit(char ch)} returns
  1852      * {@code true} for the character.
  1853      *
  1854      * <p><b>Note:</b> This method cannot handle <a
  1855      * href="#supplementary"> supplementary characters</a>. To support
  1856      * all Unicode characters, including supplementary characters, use
  1857      * the {@link #isLetterOrDigit(int)} method.
  1858      *
  1859      * @param   ch   the character to be tested.
  1860      * @return  {@code true} if the character is a letter or digit;
  1861      *          {@code false} otherwise.
  1862      * @see     Character#isDigit(char)
  1863      * @see     Character#isJavaIdentifierPart(char)
  1864      * @see     Character#isJavaLetter(char)
  1865      * @see     Character#isJavaLetterOrDigit(char)
  1866      * @see     Character#isLetter(char)
  1867      * @see     Character#isUnicodeIdentifierPart(char)
  1868      * @since   1.0.2
  1869      */
  1870     public static boolean isLetterOrDigit(char ch) {
  1871         return isLetterOrDigit((int)ch);
  1872     }
  1873 
  1874     /**
  1875      * Determines if the specified character (Unicode code point) is a letter or digit.
  1876      * <p>
  1877      * A character is considered to be a letter or digit if either
  1878      * {@link #isLetter(int) isLetter(codePoint)} or
  1879      * {@link #isDigit(int) isDigit(codePoint)} returns
  1880      * {@code true} for the character.
  1881      *
  1882      * @param   codePoint the character (Unicode code point) to be tested.
  1883      * @return  {@code true} if the character is a letter or digit;
  1884      *          {@code false} otherwise.
  1885      * @see     Character#isDigit(int)
  1886      * @see     Character#isJavaIdentifierPart(int)
  1887      * @see     Character#isLetter(int)
  1888      * @see     Character#isUnicodeIdentifierPart(int)
  1889      * @since   1.5
  1890      */
  1891     public static boolean isLetterOrDigit(int codePoint) {
  1892         return ((((1 << Character.UPPERCASE_LETTER) |
  1893             (1 << Character.LOWERCASE_LETTER) |
  1894             (1 << Character.TITLECASE_LETTER) |
  1895             (1 << Character.MODIFIER_LETTER) |
  1896             (1 << Character.OTHER_LETTER) |
  1897             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
  1898             != 0;
  1899     }
  1900     
  1901     static int getType(int x) {
  1902         throw new UnsupportedOperationException();
  1903     }
  1904     
  1905     /**
  1906      * Converts the character argument to lowercase using case
  1907      * mapping information from the UnicodeData file.
  1908      * <p>
  1909      * Note that
  1910      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
  1911      * does not always return {@code true} for some ranges of
  1912      * characters, particularly those that are symbols or ideographs.
  1913      *
  1914      * <p>In general, {@link String#toLowerCase()} should be used to map
  1915      * characters to lowercase. {@code String} case mapping methods
  1916      * have several benefits over {@code Character} case mapping methods.
  1917      * {@code String} case mapping methods can perform locale-sensitive
  1918      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  1919      * the {@code Character} case mapping methods cannot.
  1920      *
  1921      * <p><b>Note:</b> This method cannot handle <a
  1922      * href="#supplementary"> supplementary characters</a>. To support
  1923      * all Unicode characters, including supplementary characters, use
  1924      * the {@link #toLowerCase(int)} method.
  1925      *
  1926      * @param   ch   the character to be converted.
  1927      * @return  the lowercase equivalent of the character, if any;
  1928      *          otherwise, the character itself.
  1929      * @see     Character#isLowerCase(char)
  1930      * @see     String#toLowerCase()
  1931      */
  1932     public static char toLowerCase(char ch) {
  1933         throw new UnsupportedOperationException();
  1934     }
  1935 
  1936     /**
  1937      * Converts the character argument to uppercase using case mapping
  1938      * information from the UnicodeData file.
  1939      * <p>
  1940      * Note that
  1941      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
  1942      * does not always return {@code true} for some ranges of
  1943      * characters, particularly those that are symbols or ideographs.
  1944      *
  1945      * <p>In general, {@link String#toUpperCase()} should be used to map
  1946      * characters to uppercase. {@code String} case mapping methods
  1947      * have several benefits over {@code Character} case mapping methods.
  1948      * {@code String} case mapping methods can perform locale-sensitive
  1949      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  1950      * the {@code Character} case mapping methods cannot.
  1951      *
  1952      * <p><b>Note:</b> This method cannot handle <a
  1953      * href="#supplementary"> supplementary characters</a>. To support
  1954      * all Unicode characters, including supplementary characters, use
  1955      * the {@link #toUpperCase(int)} method.
  1956      *
  1957      * @param   ch   the character to be converted.
  1958      * @return  the uppercase equivalent of the character, if any;
  1959      *          otherwise, the character itself.
  1960      * @see     Character#isUpperCase(char)
  1961      * @see     String#toUpperCase()
  1962      */
  1963     public static char toUpperCase(char ch) {
  1964         throw new UnsupportedOperationException();
  1965     }
  1966 
  1967     /**
  1968      * Returns the numeric value of the character {@code ch} in the
  1969      * specified radix.
  1970      * <p>
  1971      * If the radix is not in the range {@code MIN_RADIX} &le;
  1972      * {@code radix} &le; {@code MAX_RADIX} or if the
  1973      * value of {@code ch} is not a valid digit in the specified
  1974      * radix, {@code -1} is returned. A character is a valid digit
  1975      * if at least one of the following is true:
  1976      * <ul>
  1977      * <li>The method {@code isDigit} is {@code true} of the character
  1978      *     and the Unicode decimal digit value of the character (or its
  1979      *     single-character decomposition) is less than the specified radix.
  1980      *     In this case the decimal digit value is returned.
  1981      * <li>The character is one of the uppercase Latin letters
  1982      *     {@code 'A'} through {@code 'Z'} and its code is less than
  1983      *     {@code radix + 'A' - 10}.
  1984      *     In this case, {@code ch - 'A' + 10}
  1985      *     is returned.
  1986      * <li>The character is one of the lowercase Latin letters
  1987      *     {@code 'a'} through {@code 'z'} and its code is less than
  1988      *     {@code radix + 'a' - 10}.
  1989      *     In this case, {@code ch - 'a' + 10}
  1990      *     is returned.
  1991      * <li>The character is one of the fullwidth uppercase Latin letters A
  1992      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  1993      *     and its code is less than
  1994      *     {@code radix + '\u005CuFF21' - 10}.
  1995      *     In this case, {@code ch - '\u005CuFF21' + 10}
  1996      *     is returned.
  1997      * <li>The character is one of the fullwidth lowercase Latin letters a
  1998      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  1999      *     and its code is less than
  2000      *     {@code radix + '\u005CuFF41' - 10}.
  2001      *     In this case, {@code ch - '\u005CuFF41' + 10}
  2002      *     is returned.
  2003      * </ul>
  2004      *
  2005      * <p><b>Note:</b> This method cannot handle <a
  2006      * href="#supplementary"> supplementary characters</a>. To support
  2007      * all Unicode characters, including supplementary characters, use
  2008      * the {@link #digit(int, int)} method.
  2009      *
  2010      * @param   ch      the character to be converted.
  2011      * @param   radix   the radix.
  2012      * @return  the numeric value represented by the character in the
  2013      *          specified radix.
  2014      * @see     Character#forDigit(int, int)
  2015      * @see     Character#isDigit(char)
  2016      */
  2017     public static int digit(char ch, int radix) {
  2018         return digit((int)ch, radix);
  2019     }
  2020 
  2021     /**
  2022      * Returns the numeric value of the specified character (Unicode
  2023      * code point) in the specified radix.
  2024      *
  2025      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
  2026      * {@code radix} &le; {@code MAX_RADIX} or if the
  2027      * character is not a valid digit in the specified
  2028      * radix, {@code -1} is returned. A character is a valid digit
  2029      * if at least one of the following is true:
  2030      * <ul>
  2031      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
  2032      *     and the Unicode decimal digit value of the character (or its
  2033      *     single-character decomposition) is less than the specified radix.
  2034      *     In this case the decimal digit value is returned.
  2035      * <li>The character is one of the uppercase Latin letters
  2036      *     {@code 'A'} through {@code 'Z'} and its code is less than
  2037      *     {@code radix + 'A' - 10}.
  2038      *     In this case, {@code codePoint - 'A' + 10}
  2039      *     is returned.
  2040      * <li>The character is one of the lowercase Latin letters
  2041      *     {@code 'a'} through {@code 'z'} and its code is less than
  2042      *     {@code radix + 'a' - 10}.
  2043      *     In this case, {@code codePoint - 'a' + 10}
  2044      *     is returned.
  2045      * <li>The character is one of the fullwidth uppercase Latin letters A
  2046      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  2047      *     and its code is less than
  2048      *     {@code radix + '\u005CuFF21' - 10}.
  2049      *     In this case,
  2050      *     {@code codePoint - '\u005CuFF21' + 10}
  2051      *     is returned.
  2052      * <li>The character is one of the fullwidth lowercase Latin letters a
  2053      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  2054      *     and its code is less than
  2055      *     {@code radix + '\u005CuFF41'- 10}.
  2056      *     In this case,
  2057      *     {@code codePoint - '\u005CuFF41' + 10}
  2058      *     is returned.
  2059      * </ul>
  2060      *
  2061      * @param   codePoint the character (Unicode code point) to be converted.
  2062      * @param   radix   the radix.
  2063      * @return  the numeric value represented by the character in the
  2064      *          specified radix.
  2065      * @see     Character#forDigit(int, int)
  2066      * @see     Character#isDigit(int)
  2067      * @since   1.5
  2068      */
  2069     public static int digit(int codePoint, int radix) {
  2070         throw new UnsupportedOperationException();
  2071     }
  2072 
  2073     /**
  2074      * Returns the {@code int} value that the specified Unicode
  2075      * character represents. For example, the character
  2076      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
  2077      * an int with a value of 50.
  2078      * <p>
  2079      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  2080      * {@code '\u005Cu005A'}), lowercase
  2081      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  2082      * full width variant ({@code '\u005CuFF21'} through
  2083      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  2084      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  2085      * through 35. This is independent of the Unicode specification,
  2086      * which does not assign numeric values to these {@code char}
  2087      * values.
  2088      * <p>
  2089      * If the character does not have a numeric value, then -1 is returned.
  2090      * If the character has a numeric value that cannot be represented as a
  2091      * nonnegative integer (for example, a fractional value), then -2
  2092      * is returned.
  2093      *
  2094      * <p><b>Note:</b> This method cannot handle <a
  2095      * href="#supplementary"> supplementary characters</a>. To support
  2096      * all Unicode characters, including supplementary characters, use
  2097      * the {@link #getNumericValue(int)} method.
  2098      *
  2099      * @param   ch      the character to be converted.
  2100      * @return  the numeric value of the character, as a nonnegative {@code int}
  2101      *           value; -2 if the character has a numeric value that is not a
  2102      *          nonnegative integer; -1 if the character has no numeric value.
  2103      * @see     Character#forDigit(int, int)
  2104      * @see     Character#isDigit(char)
  2105      * @since   1.1
  2106      */
  2107     public static int getNumericValue(char ch) {
  2108         return getNumericValue((int)ch);
  2109     }
  2110 
  2111     /**
  2112      * Returns the {@code int} value that the specified
  2113      * character (Unicode code point) represents. For example, the character
  2114      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
  2115      * an {@code int} with a value of 50.
  2116      * <p>
  2117      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  2118      * {@code '\u005Cu005A'}), lowercase
  2119      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  2120      * full width variant ({@code '\u005CuFF21'} through
  2121      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  2122      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  2123      * through 35. This is independent of the Unicode specification,
  2124      * which does not assign numeric values to these {@code char}
  2125      * values.
  2126      * <p>
  2127      * If the character does not have a numeric value, then -1 is returned.
  2128      * If the character has a numeric value that cannot be represented as a
  2129      * nonnegative integer (for example, a fractional value), then -2
  2130      * is returned.
  2131      *
  2132      * @param   codePoint the character (Unicode code point) to be converted.
  2133      * @return  the numeric value of the character, as a nonnegative {@code int}
  2134      *          value; -2 if the character has a numeric value that is not a
  2135      *          nonnegative integer; -1 if the character has no numeric value.
  2136      * @see     Character#forDigit(int, int)
  2137      * @see     Character#isDigit(int)
  2138      * @since   1.5
  2139      */
  2140     public static int getNumericValue(int codePoint) {
  2141         throw new UnsupportedOperationException();
  2142     }
  2143 
  2144     /**
  2145      * Determines if the specified character is ISO-LATIN-1 white space.
  2146      * This method returns {@code true} for the following five
  2147      * characters only:
  2148      * <table>
  2149      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
  2150      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
  2151      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
  2152      *     <td>{@code NEW LINE}</td></tr>
  2153      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
  2154      *     <td>{@code FORM FEED}</td></tr>
  2155      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
  2156      *     <td>{@code CARRIAGE RETURN}</td></tr>
  2157      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
  2158      *     <td>{@code SPACE}</td></tr>
  2159      * </table>
  2160      *
  2161      * @param      ch   the character to be tested.
  2162      * @return     {@code true} if the character is ISO-LATIN-1 white
  2163      *             space; {@code false} otherwise.
  2164      * @see        Character#isSpaceChar(char)
  2165      * @see        Character#isWhitespace(char)
  2166      * @deprecated Replaced by isWhitespace(char).
  2167      */
  2168     @Deprecated
  2169     public static boolean isSpace(char ch) {
  2170         return (ch <= 0x0020) &&
  2171             (((((1L << 0x0009) |
  2172             (1L << 0x000A) |
  2173             (1L << 0x000C) |
  2174             (1L << 0x000D) |
  2175             (1L << 0x0020)) >> ch) & 1L) != 0);
  2176     }
  2177 
  2178 
  2179 
  2180     /**
  2181      * Determines if the specified character is white space according to Java.
  2182      * A character is a Java whitespace character if and only if it satisfies
  2183      * one of the following criteria:
  2184      * <ul>
  2185      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
  2186      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
  2187      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  2188      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  2189      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  2190      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  2191      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  2192      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  2193      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  2194      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  2195      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  2196      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  2197      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  2198      * </ul>
  2199      *
  2200      * <p><b>Note:</b> This method cannot handle <a
  2201      * href="#supplementary"> supplementary characters</a>. To support
  2202      * all Unicode characters, including supplementary characters, use
  2203      * the {@link #isWhitespace(int)} method.
  2204      *
  2205      * @param   ch the character to be tested.
  2206      * @return  {@code true} if the character is a Java whitespace
  2207      *          character; {@code false} otherwise.
  2208      * @see     Character#isSpaceChar(char)
  2209      * @since   1.1
  2210      */
  2211     public static boolean isWhitespace(char ch) {
  2212         return isWhitespace((int)ch);
  2213     }
  2214 
  2215     /**
  2216      * Determines if the specified character (Unicode code point) is
  2217      * white space according to Java.  A character is a Java
  2218      * whitespace character if and only if it satisfies one of the
  2219      * following criteria:
  2220      * <ul>
  2221      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
  2222      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
  2223      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  2224      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  2225      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  2226      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  2227      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  2228      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  2229      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  2230      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  2231      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  2232      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  2233      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  2234      * </ul>
  2235      * <p>
  2236      *
  2237      * @param   codePoint the character (Unicode code point) to be tested.
  2238      * @return  {@code true} if the character is a Java whitespace
  2239      *          character; {@code false} otherwise.
  2240      * @see     Character#isSpaceChar(int)
  2241      * @since   1.5
  2242      */
  2243     public static boolean isWhitespace(int codePoint) {
  2244         throw new UnsupportedOperationException();
  2245     }
  2246 
  2247     /**
  2248      * Determines if the specified character is an ISO control
  2249      * character.  A character is considered to be an ISO control
  2250      * character if its code is in the range {@code '\u005Cu0000'}
  2251      * through {@code '\u005Cu001F'} or in the range
  2252      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  2253      *
  2254      * <p><b>Note:</b> This method cannot handle <a
  2255      * href="#supplementary"> supplementary characters</a>. To support
  2256      * all Unicode characters, including supplementary characters, use
  2257      * the {@link #isISOControl(int)} method.
  2258      *
  2259      * @param   ch      the character to be tested.
  2260      * @return  {@code true} if the character is an ISO control character;
  2261      *          {@code false} otherwise.
  2262      *
  2263      * @see     Character#isSpaceChar(char)
  2264      * @see     Character#isWhitespace(char)
  2265      * @since   1.1
  2266      */
  2267     public static boolean isISOControl(char ch) {
  2268         return isISOControl((int)ch);
  2269     }
  2270 
  2271     /**
  2272      * Determines if the referenced character (Unicode code point) is an ISO control
  2273      * character.  A character is considered to be an ISO control
  2274      * character if its code is in the range {@code '\u005Cu0000'}
  2275      * through {@code '\u005Cu001F'} or in the range
  2276      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  2277      *
  2278      * @param   codePoint the character (Unicode code point) to be tested.
  2279      * @return  {@code true} if the character is an ISO control character;
  2280      *          {@code false} otherwise.
  2281      * @see     Character#isSpaceChar(int)
  2282      * @see     Character#isWhitespace(int)
  2283      * @since   1.5
  2284      */
  2285     public static boolean isISOControl(int codePoint) {
  2286         // Optimized form of:
  2287         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
  2288         //     (codePoint >= 0x7F && codePoint <= 0x9F);
  2289         return codePoint <= 0x9F &&
  2290             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
  2291     }
  2292 
  2293     /**
  2294      * Determines the character representation for a specific digit in
  2295      * the specified radix. If the value of {@code radix} is not a
  2296      * valid radix, or the value of {@code digit} is not a valid
  2297      * digit in the specified radix, the null character
  2298      * ({@code '\u005Cu0000'}) is returned.
  2299      * <p>
  2300      * The {@code radix} argument is valid if it is greater than or
  2301      * equal to {@code MIN_RADIX} and less than or equal to
  2302      * {@code MAX_RADIX}. The {@code digit} argument is valid if
  2303      * {@code 0 <= digit < radix}.
  2304      * <p>
  2305      * If the digit is less than 10, then
  2306      * {@code '0' + digit} is returned. Otherwise, the value
  2307      * {@code 'a' + digit - 10} is returned.
  2308      *
  2309      * @param   digit   the number to convert to a character.
  2310      * @param   radix   the radix.
  2311      * @return  the {@code char} representation of the specified digit
  2312      *          in the specified radix.
  2313      * @see     Character#MIN_RADIX
  2314      * @see     Character#MAX_RADIX
  2315      * @see     Character#digit(char, int)
  2316      */
  2317     public static char forDigit(int digit, int radix) {
  2318         if ((digit >= radix) || (digit < 0)) {
  2319             return '\0';
  2320         }
  2321         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
  2322             return '\0';
  2323         }
  2324         if (digit < 10) {
  2325             return (char)('0' + digit);
  2326         }
  2327         return (char)('a' - 10 + digit);
  2328     }
  2329 
  2330     /**
  2331      * Compares two {@code Character} objects numerically.
  2332      *
  2333      * @param   anotherCharacter   the {@code Character} to be compared.
  2334 
  2335      * @return  the value {@code 0} if the argument {@code Character}
  2336      *          is equal to this {@code Character}; a value less than
  2337      *          {@code 0} if this {@code Character} is numerically less
  2338      *          than the {@code Character} argument; and a value greater than
  2339      *          {@code 0} if this {@code Character} is numerically greater
  2340      *          than the {@code Character} argument (unsigned comparison).
  2341      *          Note that this is strictly a numerical comparison; it is not
  2342      *          locale-dependent.
  2343      * @since   1.2
  2344      */
  2345     public int compareTo(Character anotherCharacter) {
  2346         return compare(this.value, anotherCharacter.value);
  2347     }
  2348 
  2349     /**
  2350      * Compares two {@code char} values numerically.
  2351      * The value returned is identical to what would be returned by:
  2352      * <pre>
  2353      *    Character.valueOf(x).compareTo(Character.valueOf(y))
  2354      * </pre>
  2355      *
  2356      * @param  x the first {@code char} to compare
  2357      * @param  y the second {@code char} to compare
  2358      * @return the value {@code 0} if {@code x == y};
  2359      *         a value less than {@code 0} if {@code x < y}; and
  2360      *         a value greater than {@code 0} if {@code x > y}
  2361      * @since 1.7
  2362      */
  2363     public static int compare(char x, char y) {
  2364         return x - y;
  2365     }
  2366 
  2367 
  2368     /**
  2369      * The number of bits used to represent a <tt>char</tt> value in unsigned
  2370      * binary form, constant {@code 16}.
  2371      *
  2372      * @since 1.5
  2373      */
  2374     public static final int SIZE = 16;
  2375 
  2376     /**
  2377      * Returns the value obtained by reversing the order of the bytes in the
  2378      * specified <tt>char</tt> value.
  2379      *
  2380      * @return the value obtained by reversing (or, equivalently, swapping)
  2381      *     the bytes in the specified <tt>char</tt> value.
  2382      * @since 1.5
  2383      */
  2384     public static char reverseBytes(char ch) {
  2385         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
  2386     }
  2387 
  2388 }