rt/emul/mini/src/main/java/java/lang/Character.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Tue, 26 Feb 2013 16:54:16 +0100
changeset 772 d382dacfd73f
parent 594 emul/mini/src/main/java/java/lang/Character.java@035fcbd7a33c
child 791 af4001c85438
permissions -rw-r--r--
Moving modules around so the runtime is under one master pom and can be built without building other modules that are in the repository
     1 /*
     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.lang;
    27 
    28 import org.apidesign.bck2brwsr.core.JavaScriptBody;
    29 
    30 /**
    31  * The {@code Character} class wraps a value of the primitive
    32  * type {@code char} in an object. An object of type
    33  * {@code Character} contains a single field whose type is
    34  * {@code char}.
    35  * <p>
    36  * In addition, this class provides several methods for determining
    37  * a character's category (lowercase letter, digit, etc.) and for converting
    38  * characters from uppercase to lowercase and vice versa.
    39  * <p>
    40  * Character information is based on the Unicode Standard, version 6.0.0.
    41  * <p>
    42  * The methods and data of class {@code Character} are defined by
    43  * the information in the <i>UnicodeData</i> file that is part of the
    44  * Unicode Character Database maintained by the Unicode
    45  * Consortium. This file specifies various properties including name
    46  * and general category for every defined Unicode code point or
    47  * character range.
    48  * <p>
    49  * The file and its description are available from the Unicode Consortium at:
    50  * <ul>
    51  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
    52  * </ul>
    53  *
    54  * <h4><a name="unicode">Unicode Character Representations</a></h4>
    55  *
    56  * <p>The {@code char} data type (and therefore the value that a
    57  * {@code Character} object encapsulates) are based on the
    58  * original Unicode specification, which defined characters as
    59  * fixed-width 16-bit entities. The Unicode Standard has since been
    60  * changed to allow for characters whose representation requires more
    61  * than 16 bits.  The range of legal <em>code point</em>s is now
    62  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
    63  * (Refer to the <a
    64  * href="http://www.unicode.org/reports/tr27/#notation"><i>
    65  * definition</i></a> of the U+<i>n</i> notation in the Unicode
    66  * Standard.)
    67  *
    68  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
    69  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
    70  * <a name="supplementary">Characters</a> whose code points are greater
    71  * than U+FFFF are called <em>supplementary character</em>s.  The Java
    72  * platform uses the UTF-16 representation in {@code char} arrays and
    73  * in the {@code String} and {@code StringBuffer} classes. In
    74  * this representation, supplementary characters are represented as a pair
    75  * of {@code char} values, the first from the <em>high-surrogates</em>
    76  * range, (&#92;uD800-&#92;uDBFF), the second from the
    77  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
    78  *
    79  * <p>A {@code char} value, therefore, represents Basic
    80  * Multilingual Plane (BMP) code points, including the surrogate
    81  * code points, or code units of the UTF-16 encoding. An
    82  * {@code int} value represents all Unicode code points,
    83  * including supplementary code points. The lower (least significant)
    84  * 21 bits of {@code int} are used to represent Unicode code
    85  * points and the upper (most significant) 11 bits must be zero.
    86  * Unless otherwise specified, the behavior with respect to
    87  * supplementary characters and surrogate {@code char} values is
    88  * as follows:
    89  *
    90  * <ul>
    91  * <li>The methods that only accept a {@code char} value cannot support
    92  * supplementary characters. They treat {@code char} values from the
    93  * surrogate ranges as undefined characters. For example,
    94  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
    95  * this specific value if followed by any low-surrogate value in a string
    96  * would represent a letter.
    97  *
    98  * <li>The methods that accept an {@code int} value support all
    99  * Unicode characters, including supplementary characters. For
   100  * example, {@code Character.isLetter(0x2F81A)} returns
   101  * {@code true} because the code point value represents a letter
   102  * (a CJK ideograph).
   103  * </ul>
   104  *
   105  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
   106  * used for character values in the range between U+0000 and U+10FFFF,
   107  * and <em>Unicode code unit</em> is used for 16-bit
   108  * {@code char} values that are code units of the <em>UTF-16</em>
   109  * encoding. For more information on Unicode terminology, refer to the
   110  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
   111  *
   112  * @author  Lee Boynton
   113  * @author  Guy Steele
   114  * @author  Akira Tanaka
   115  * @author  Martin Buchholz
   116  * @author  Ulf Zibis
   117  * @since   1.0
   118  */
   119 public final
   120 class Character implements java.io.Serializable, Comparable<Character> {
   121     /**
   122      * The minimum radix available for conversion to and from strings.
   123      * The constant value of this field is the smallest value permitted
   124      * for the radix argument in radix-conversion methods such as the
   125      * {@code digit} method, the {@code forDigit} method, and the
   126      * {@code toString} method of class {@code Integer}.
   127      *
   128      * @see     Character#digit(char, int)
   129      * @see     Character#forDigit(int, int)
   130      * @see     Integer#toString(int, int)
   131      * @see     Integer#valueOf(String)
   132      */
   133     public static final int MIN_RADIX = 2;
   134 
   135     /**
   136      * The maximum radix available for conversion to and from strings.
   137      * The constant value of this field is the largest value permitted
   138      * for the radix argument in radix-conversion methods such as the
   139      * {@code digit} method, the {@code forDigit} method, and the
   140      * {@code toString} method of class {@code Integer}.
   141      *
   142      * @see     Character#digit(char, int)
   143      * @see     Character#forDigit(int, int)
   144      * @see     Integer#toString(int, int)
   145      * @see     Integer#valueOf(String)
   146      */
   147     public static final int MAX_RADIX = 36;
   148 
   149     /**
   150      * The constant value of this field is the smallest value of type
   151      * {@code char}, {@code '\u005Cu0000'}.
   152      *
   153      * @since   1.0.2
   154      */
   155     public static final char MIN_VALUE = '\u0000';
   156 
   157     /**
   158      * The constant value of this field is the largest value of type
   159      * {@code char}, {@code '\u005CuFFFF'}.
   160      *
   161      * @since   1.0.2
   162      */
   163     public static final char MAX_VALUE = '\uFFFF';
   164 
   165     /**
   166      * The {@code Class} instance representing the primitive type
   167      * {@code char}.
   168      *
   169      * @since   1.1
   170      */
   171     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
   172 
   173     /*
   174      * Normative general types
   175      */
   176 
   177     /*
   178      * General character types
   179      */
   180 
   181     /**
   182      * General category "Cn" in the Unicode specification.
   183      * @since   1.1
   184      */
   185     public static final byte UNASSIGNED = 0;
   186 
   187     /**
   188      * General category "Lu" in the Unicode specification.
   189      * @since   1.1
   190      */
   191     public static final byte UPPERCASE_LETTER = 1;
   192 
   193     /**
   194      * General category "Ll" in the Unicode specification.
   195      * @since   1.1
   196      */
   197     public static final byte LOWERCASE_LETTER = 2;
   198 
   199     /**
   200      * General category "Lt" in the Unicode specification.
   201      * @since   1.1
   202      */
   203     public static final byte TITLECASE_LETTER = 3;
   204 
   205     /**
   206      * General category "Lm" in the Unicode specification.
   207      * @since   1.1
   208      */
   209     public static final byte MODIFIER_LETTER = 4;
   210 
   211     /**
   212      * General category "Lo" in the Unicode specification.
   213      * @since   1.1
   214      */
   215     public static final byte OTHER_LETTER = 5;
   216 
   217     /**
   218      * General category "Mn" in the Unicode specification.
   219      * @since   1.1
   220      */
   221     public static final byte NON_SPACING_MARK = 6;
   222 
   223     /**
   224      * General category "Me" in the Unicode specification.
   225      * @since   1.1
   226      */
   227     public static final byte ENCLOSING_MARK = 7;
   228 
   229     /**
   230      * General category "Mc" in the Unicode specification.
   231      * @since   1.1
   232      */
   233     public static final byte COMBINING_SPACING_MARK = 8;
   234 
   235     /**
   236      * General category "Nd" in the Unicode specification.
   237      * @since   1.1
   238      */
   239     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
   240 
   241     /**
   242      * General category "Nl" in the Unicode specification.
   243      * @since   1.1
   244      */
   245     public static final byte LETTER_NUMBER = 10;
   246 
   247     /**
   248      * General category "No" in the Unicode specification.
   249      * @since   1.1
   250      */
   251     public static final byte OTHER_NUMBER = 11;
   252 
   253     /**
   254      * General category "Zs" in the Unicode specification.
   255      * @since   1.1
   256      */
   257     public static final byte SPACE_SEPARATOR = 12;
   258 
   259     /**
   260      * General category "Zl" in the Unicode specification.
   261      * @since   1.1
   262      */
   263     public static final byte LINE_SEPARATOR = 13;
   264 
   265     /**
   266      * General category "Zp" in the Unicode specification.
   267      * @since   1.1
   268      */
   269     public static final byte PARAGRAPH_SEPARATOR = 14;
   270 
   271     /**
   272      * General category "Cc" in the Unicode specification.
   273      * @since   1.1
   274      */
   275     public static final byte CONTROL = 15;
   276 
   277     /**
   278      * General category "Cf" in the Unicode specification.
   279      * @since   1.1
   280      */
   281     public static final byte FORMAT = 16;
   282 
   283     /**
   284      * General category "Co" in the Unicode specification.
   285      * @since   1.1
   286      */
   287     public static final byte PRIVATE_USE = 18;
   288 
   289     /**
   290      * General category "Cs" in the Unicode specification.
   291      * @since   1.1
   292      */
   293     public static final byte SURROGATE = 19;
   294 
   295     /**
   296      * General category "Pd" in the Unicode specification.
   297      * @since   1.1
   298      */
   299     public static final byte DASH_PUNCTUATION = 20;
   300 
   301     /**
   302      * General category "Ps" in the Unicode specification.
   303      * @since   1.1
   304      */
   305     public static final byte START_PUNCTUATION = 21;
   306 
   307     /**
   308      * General category "Pe" in the Unicode specification.
   309      * @since   1.1
   310      */
   311     public static final byte END_PUNCTUATION = 22;
   312 
   313     /**
   314      * General category "Pc" in the Unicode specification.
   315      * @since   1.1
   316      */
   317     public static final byte CONNECTOR_PUNCTUATION = 23;
   318 
   319     /**
   320      * General category "Po" in the Unicode specification.
   321      * @since   1.1
   322      */
   323     public static final byte OTHER_PUNCTUATION = 24;
   324 
   325     /**
   326      * General category "Sm" in the Unicode specification.
   327      * @since   1.1
   328      */
   329     public static final byte MATH_SYMBOL = 25;
   330 
   331     /**
   332      * General category "Sc" in the Unicode specification.
   333      * @since   1.1
   334      */
   335     public static final byte CURRENCY_SYMBOL = 26;
   336 
   337     /**
   338      * General category "Sk" in the Unicode specification.
   339      * @since   1.1
   340      */
   341     public static final byte MODIFIER_SYMBOL = 27;
   342 
   343     /**
   344      * General category "So" in the Unicode specification.
   345      * @since   1.1
   346      */
   347     public static final byte OTHER_SYMBOL = 28;
   348 
   349     /**
   350      * General category "Pi" in the Unicode specification.
   351      * @since   1.4
   352      */
   353     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
   354 
   355     /**
   356      * General category "Pf" in the Unicode specification.
   357      * @since   1.4
   358      */
   359     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
   360 
   361     /**
   362      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
   363      */
   364     static final int ERROR = 0xFFFFFFFF;
   365 
   366 
   367     /**
   368      * Undefined bidirectional character type. Undefined {@code char}
   369      * values have undefined directionality in the Unicode specification.
   370      * @since 1.4
   371      */
   372     public static final byte DIRECTIONALITY_UNDEFINED = -1;
   373 
   374     /**
   375      * Strong bidirectional character type "L" in the Unicode specification.
   376      * @since 1.4
   377      */
   378     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
   379 
   380     /**
   381      * Strong bidirectional character type "R" in the Unicode specification.
   382      * @since 1.4
   383      */
   384     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
   385 
   386     /**
   387     * Strong bidirectional character type "AL" in the Unicode specification.
   388      * @since 1.4
   389      */
   390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
   391 
   392     /**
   393      * Weak bidirectional character type "EN" in the Unicode specification.
   394      * @since 1.4
   395      */
   396     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
   397 
   398     /**
   399      * Weak bidirectional character type "ES" in the Unicode specification.
   400      * @since 1.4
   401      */
   402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
   403 
   404     /**
   405      * Weak bidirectional character type "ET" in the Unicode specification.
   406      * @since 1.4
   407      */
   408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
   409 
   410     /**
   411      * Weak bidirectional character type "AN" in the Unicode specification.
   412      * @since 1.4
   413      */
   414     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
   415 
   416     /**
   417      * Weak bidirectional character type "CS" in the Unicode specification.
   418      * @since 1.4
   419      */
   420     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
   421 
   422     /**
   423      * Weak bidirectional character type "NSM" in the Unicode specification.
   424      * @since 1.4
   425      */
   426     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
   427 
   428     /**
   429      * Weak bidirectional character type "BN" in the Unicode specification.
   430      * @since 1.4
   431      */
   432     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
   433 
   434     /**
   435      * Neutral bidirectional character type "B" in the Unicode specification.
   436      * @since 1.4
   437      */
   438     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
   439 
   440     /**
   441      * Neutral bidirectional character type "S" in the Unicode specification.
   442      * @since 1.4
   443      */
   444     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
   445 
   446     /**
   447      * Neutral bidirectional character type "WS" in the Unicode specification.
   448      * @since 1.4
   449      */
   450     public static final byte DIRECTIONALITY_WHITESPACE = 12;
   451 
   452     /**
   453      * Neutral bidirectional character type "ON" in the Unicode specification.
   454      * @since 1.4
   455      */
   456     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
   457 
   458     /**
   459      * Strong bidirectional character type "LRE" in the Unicode specification.
   460      * @since 1.4
   461      */
   462     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
   463 
   464     /**
   465      * Strong bidirectional character type "LRO" in the Unicode specification.
   466      * @since 1.4
   467      */
   468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
   469 
   470     /**
   471      * Strong bidirectional character type "RLE" in the Unicode specification.
   472      * @since 1.4
   473      */
   474     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
   475 
   476     /**
   477      * Strong bidirectional character type "RLO" in the Unicode specification.
   478      * @since 1.4
   479      */
   480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
   481 
   482     /**
   483      * Weak bidirectional character type "PDF" in the Unicode specification.
   484      * @since 1.4
   485      */
   486     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
   487 
   488     /**
   489      * The minimum value of a
   490      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   491      * Unicode high-surrogate code unit</a>
   492      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
   493      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   494      *
   495      * @since 1.5
   496      */
   497     public static final char MIN_HIGH_SURROGATE = '\uD800';
   498 
   499     /**
   500      * The maximum value of a
   501      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   502      * Unicode high-surrogate code unit</a>
   503      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
   504      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   505      *
   506      * @since 1.5
   507      */
   508     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
   509 
   510     /**
   511      * The minimum value of a
   512      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   513      * Unicode low-surrogate code unit</a>
   514      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
   515      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   516      *
   517      * @since 1.5
   518      */
   519     public static final char MIN_LOW_SURROGATE  = '\uDC00';
   520 
   521     /**
   522      * The maximum value of a
   523      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   524      * Unicode low-surrogate code unit</a>
   525      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   526      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   527      *
   528      * @since 1.5
   529      */
   530     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
   531 
   532     /**
   533      * The minimum value of a Unicode surrogate code unit in the
   534      * UTF-16 encoding, constant {@code '\u005CuD800'}.
   535      *
   536      * @since 1.5
   537      */
   538     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
   539 
   540     /**
   541      * The maximum value of a Unicode surrogate code unit in the
   542      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   543      *
   544      * @since 1.5
   545      */
   546     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
   547 
   548     /**
   549      * The minimum value of a
   550      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
   551      * Unicode supplementary code point</a>, constant {@code U+10000}.
   552      *
   553      * @since 1.5
   554      */
   555     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
   556 
   557     /**
   558      * The minimum value of a
   559      * <a href="http://www.unicode.org/glossary/#code_point">
   560      * Unicode code point</a>, constant {@code U+0000}.
   561      *
   562      * @since 1.5
   563      */
   564     public static final int MIN_CODE_POINT = 0x000000;
   565 
   566     /**
   567      * The maximum value of a
   568      * <a href="http://www.unicode.org/glossary/#code_point">
   569      * Unicode code point</a>, constant {@code U+10FFFF}.
   570      *
   571      * @since 1.5
   572      */
   573     public static final int MAX_CODE_POINT = 0X10FFFF;
   574 
   575 
   576     /**
   577      * Instances of this class represent particular subsets of the Unicode
   578      * character set.  The only family of subsets defined in the
   579      * {@code Character} class is {@link Character.UnicodeBlock}.
   580      * Other portions of the Java API may define other subsets for their
   581      * own purposes.
   582      *
   583      * @since 1.2
   584      */
   585     public static class Subset  {
   586 
   587         private String name;
   588 
   589         /**
   590          * Constructs a new {@code Subset} instance.
   591          *
   592          * @param  name  The name of this subset
   593          * @exception NullPointerException if name is {@code null}
   594          */
   595         protected Subset(String name) {
   596             if (name == null) {
   597                 throw new NullPointerException("name");
   598             }
   599             this.name = name;
   600         }
   601 
   602         /**
   603          * Compares two {@code Subset} objects for equality.
   604          * This method returns {@code true} if and only if
   605          * {@code this} and the argument refer to the same
   606          * object; since this method is {@code final}, this
   607          * guarantee holds for all subclasses.
   608          */
   609         public final boolean equals(Object obj) {
   610             return (this == obj);
   611         }
   612 
   613         /**
   614          * Returns the standard hash code as defined by the
   615          * {@link Object#hashCode} method.  This method
   616          * is {@code final} in order to ensure that the
   617          * {@code equals} and {@code hashCode} methods will
   618          * be consistent in all subclasses.
   619          */
   620         public final int hashCode() {
   621             return super.hashCode();
   622         }
   623 
   624         /**
   625          * Returns the name of this subset.
   626          */
   627         public final String toString() {
   628             return name;
   629         }
   630     }
   631 
   632     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
   633     // for the latest specification of Unicode Blocks.
   634 
   635 
   636     /**
   637      * The value of the {@code Character}.
   638      *
   639      * @serial
   640      */
   641     private final char value;
   642 
   643     /** use serialVersionUID from JDK 1.0.2 for interoperability */
   644     private static final long serialVersionUID = 3786198910865385080L;
   645 
   646     /**
   647      * Constructs a newly allocated {@code Character} object that
   648      * represents the specified {@code char} value.
   649      *
   650      * @param  value   the value to be represented by the
   651      *                  {@code Character} object.
   652      */
   653     public Character(char value) {
   654         this.value = value;
   655     }
   656 
   657     private static class CharacterCache {
   658         private CharacterCache(){}
   659 
   660         static final Character cache[] = new Character[127 + 1];
   661 
   662         static {
   663             for (int i = 0; i < cache.length; i++)
   664                 cache[i] = new Character((char)i);
   665         }
   666     }
   667 
   668     /**
   669      * Returns a <tt>Character</tt> instance representing the specified
   670      * <tt>char</tt> value.
   671      * If a new <tt>Character</tt> instance is not required, this method
   672      * should generally be used in preference to the constructor
   673      * {@link #Character(char)}, as this method is likely to yield
   674      * significantly better space and time performance by caching
   675      * frequently requested values.
   676      *
   677      * This method will always cache values in the range {@code
   678      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   679      * cache other values outside of this range.
   680      *
   681      * @param  c a char value.
   682      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   683      * @since  1.5
   684      */
   685     public static Character valueOf(char c) {
   686         if (c <= 127) { // must cache
   687             return CharacterCache.cache[(int)c];
   688         }
   689         return new Character(c);
   690     }
   691 
   692     /**
   693      * Returns the value of this {@code Character} object.
   694      * @return  the primitive {@code char} value represented by
   695      *          this object.
   696      */
   697     public char charValue() {
   698         return value;
   699     }
   700 
   701     /**
   702      * Returns a hash code for this {@code Character}; equal to the result
   703      * of invoking {@code charValue()}.
   704      *
   705      * @return a hash code value for this {@code Character}
   706      */
   707     public int hashCode() {
   708         return (int)value;
   709     }
   710 
   711     /**
   712      * Compares this object against the specified object.
   713      * The result is {@code true} if and only if the argument is not
   714      * {@code null} and is a {@code Character} object that
   715      * represents the same {@code char} value as this object.
   716      *
   717      * @param   obj   the object to compare with.
   718      * @return  {@code true} if the objects are the same;
   719      *          {@code false} otherwise.
   720      */
   721     public boolean equals(Object obj) {
   722         if (obj instanceof Character) {
   723             return value == ((Character)obj).charValue();
   724         }
   725         return false;
   726     }
   727 
   728     /**
   729      * Returns a {@code String} object representing this
   730      * {@code Character}'s value.  The result is a string of
   731      * length 1 whose sole component is the primitive
   732      * {@code char} value represented by this
   733      * {@code Character} object.
   734      *
   735      * @return  a string representation of this object.
   736      */
   737     public String toString() {
   738         char buf[] = {value};
   739         return String.valueOf(buf);
   740     }
   741 
   742     /**
   743      * Returns a {@code String} object representing the
   744      * specified {@code char}.  The result is a string of length
   745      * 1 consisting solely of the specified {@code char}.
   746      *
   747      * @param c the {@code char} to be converted
   748      * @return the string representation of the specified {@code char}
   749      * @since 1.4
   750      */
   751     public static String toString(char c) {
   752         return String.valueOf(c);
   753     }
   754 
   755     /**
   756      * Determines whether the specified code point is a valid
   757      * <a href="http://www.unicode.org/glossary/#code_point">
   758      * Unicode code point value</a>.
   759      *
   760      * @param  codePoint the Unicode code point to be tested
   761      * @return {@code true} if the specified code point value is between
   762      *         {@link #MIN_CODE_POINT} and
   763      *         {@link #MAX_CODE_POINT} inclusive;
   764      *         {@code false} otherwise.
   765      * @since  1.5
   766      */
   767     public static boolean isValidCodePoint(int codePoint) {
   768         // Optimized form of:
   769         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   770         int plane = codePoint >>> 16;
   771         return plane < ((MAX_CODE_POINT + 1) >>> 16);
   772     }
   773 
   774     /**
   775      * Determines whether the specified character (Unicode code point)
   776      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   777      * Such code points can be represented using a single {@code char}.
   778      *
   779      * @param  codePoint the character (Unicode code point) to be tested
   780      * @return {@code true} if the specified code point is between
   781      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   782      *         {@code false} otherwise.
   783      * @since  1.7
   784      */
   785     public static boolean isBmpCodePoint(int codePoint) {
   786         return codePoint >>> 16 == 0;
   787         // Optimized form of:
   788         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   789         // We consistently use logical shift (>>>) to facilitate
   790         // additional runtime optimizations.
   791     }
   792 
   793     /**
   794      * Determines whether the specified character (Unicode code point)
   795      * is in the <a href="#supplementary">supplementary character</a> range.
   796      *
   797      * @param  codePoint the character (Unicode code point) to be tested
   798      * @return {@code true} if the specified code point is between
   799      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   800      *         {@link #MAX_CODE_POINT} inclusive;
   801      *         {@code false} otherwise.
   802      * @since  1.5
   803      */
   804     public static boolean isSupplementaryCodePoint(int codePoint) {
   805         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   806             && codePoint <  MAX_CODE_POINT + 1;
   807     }
   808 
   809     /**
   810      * Determines if the given {@code char} value is a
   811      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   812      * Unicode high-surrogate code unit</a>
   813      * (also known as <i>leading-surrogate code unit</i>).
   814      *
   815      * <p>Such values do not represent characters by themselves,
   816      * but are used in the representation of
   817      * <a href="#supplementary">supplementary characters</a>
   818      * in the UTF-16 encoding.
   819      *
   820      * @param  ch the {@code char} value to be tested.
   821      * @return {@code true} if the {@code char} value is between
   822      *         {@link #MIN_HIGH_SURROGATE} and
   823      *         {@link #MAX_HIGH_SURROGATE} inclusive;
   824      *         {@code false} otherwise.
   825      * @see    Character#isLowSurrogate(char)
   826      * @see    Character.UnicodeBlock#of(int)
   827      * @since  1.5
   828      */
   829     public static boolean isHighSurrogate(char ch) {
   830         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   831         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   832     }
   833 
   834     /**
   835      * Determines if the given {@code char} value is a
   836      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   837      * Unicode low-surrogate code unit</a>
   838      * (also known as <i>trailing-surrogate code unit</i>).
   839      *
   840      * <p>Such values do not represent characters by themselves,
   841      * but are used in the representation of
   842      * <a href="#supplementary">supplementary characters</a>
   843      * in the UTF-16 encoding.
   844      *
   845      * @param  ch the {@code char} value to be tested.
   846      * @return {@code true} if the {@code char} value is between
   847      *         {@link #MIN_LOW_SURROGATE} and
   848      *         {@link #MAX_LOW_SURROGATE} inclusive;
   849      *         {@code false} otherwise.
   850      * @see    Character#isHighSurrogate(char)
   851      * @since  1.5
   852      */
   853     public static boolean isLowSurrogate(char ch) {
   854         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   855     }
   856 
   857     /**
   858      * Determines if the given {@code char} value is a Unicode
   859      * <i>surrogate code unit</i>.
   860      *
   861      * <p>Such values do not represent characters by themselves,
   862      * but are used in the representation of
   863      * <a href="#supplementary">supplementary characters</a>
   864      * in the UTF-16 encoding.
   865      *
   866      * <p>A char value is a surrogate code unit if and only if it is either
   867      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   868      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   869      *
   870      * @param  ch the {@code char} value to be tested.
   871      * @return {@code true} if the {@code char} value is between
   872      *         {@link #MIN_SURROGATE} and
   873      *         {@link #MAX_SURROGATE} inclusive;
   874      *         {@code false} otherwise.
   875      * @since  1.7
   876      */
   877     public static boolean isSurrogate(char ch) {
   878         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   879     }
   880 
   881     /**
   882      * Determines whether the specified pair of {@code char}
   883      * values is a valid
   884      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   885      * Unicode surrogate pair</a>.
   886 
   887      * <p>This method is equivalent to the expression:
   888      * <blockquote><pre>
   889      * isHighSurrogate(high) && isLowSurrogate(low)
   890      * </pre></blockquote>
   891      *
   892      * @param  high the high-surrogate code value to be tested
   893      * @param  low the low-surrogate code value to be tested
   894      * @return {@code true} if the specified high and
   895      * low-surrogate code values represent a valid surrogate pair;
   896      * {@code false} otherwise.
   897      * @since  1.5
   898      */
   899     public static boolean isSurrogatePair(char high, char low) {
   900         return isHighSurrogate(high) && isLowSurrogate(low);
   901     }
   902 
   903     /**
   904      * Determines the number of {@code char} values needed to
   905      * represent the specified character (Unicode code point). If the
   906      * specified character is equal to or greater than 0x10000, then
   907      * the method returns 2. Otherwise, the method returns 1.
   908      *
   909      * <p>This method doesn't validate the specified character to be a
   910      * valid Unicode code point. The caller must validate the
   911      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   912      * if necessary.
   913      *
   914      * @param   codePoint the character (Unicode code point) to be tested.
   915      * @return  2 if the character is a valid supplementary character; 1 otherwise.
   916      * @see     Character#isSupplementaryCodePoint(int)
   917      * @since   1.5
   918      */
   919     public static int charCount(int codePoint) {
   920         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   921     }
   922 
   923     /**
   924      * Converts the specified surrogate pair to its supplementary code
   925      * point value. This method does not validate the specified
   926      * surrogate pair. The caller must validate it using {@link
   927      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   928      *
   929      * @param  high the high-surrogate code unit
   930      * @param  low the low-surrogate code unit
   931      * @return the supplementary code point composed from the
   932      *         specified surrogate pair.
   933      * @since  1.5
   934      */
   935     public static int toCodePoint(char high, char low) {
   936         // Optimized form of:
   937         // return ((high - MIN_HIGH_SURROGATE) << 10)
   938         //         + (low - MIN_LOW_SURROGATE)
   939         //         + MIN_SUPPLEMENTARY_CODE_POINT;
   940         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   941                                        - (MIN_HIGH_SURROGATE << 10)
   942                                        - MIN_LOW_SURROGATE);
   943     }
   944 
   945     /**
   946      * Returns the code point at the given index of the
   947      * {@code CharSequence}. If the {@code char} value at
   948      * the given index in the {@code CharSequence} is in the
   949      * high-surrogate range, the following index is less than the
   950      * length of the {@code CharSequence}, and the
   951      * {@code char} value at the following index is in the
   952      * low-surrogate range, then the supplementary code point
   953      * corresponding to this surrogate pair is returned. Otherwise,
   954      * the {@code char} value at the given index is returned.
   955      *
   956      * @param seq a sequence of {@code char} values (Unicode code
   957      * units)
   958      * @param index the index to the {@code char} values (Unicode
   959      * code units) in {@code seq} to be converted
   960      * @return the Unicode code point at the given index
   961      * @exception NullPointerException if {@code seq} is null.
   962      * @exception IndexOutOfBoundsException if the value
   963      * {@code index} is negative or not less than
   964      * {@link CharSequence#length() seq.length()}.
   965      * @since  1.5
   966      */
   967     public static int codePointAt(CharSequence seq, int index) {
   968         char c1 = seq.charAt(index++);
   969         if (isHighSurrogate(c1)) {
   970             if (index < seq.length()) {
   971                 char c2 = seq.charAt(index);
   972                 if (isLowSurrogate(c2)) {
   973                     return toCodePoint(c1, c2);
   974                 }
   975             }
   976         }
   977         return c1;
   978     }
   979 
   980     /**
   981      * Returns the code point at the given index of the
   982      * {@code char} array. If the {@code char} value at
   983      * the given index in the {@code char} array is in the
   984      * high-surrogate range, the following index is less than the
   985      * length of the {@code char} array, and the
   986      * {@code char} value at the following index is in the
   987      * low-surrogate range, then the supplementary code point
   988      * corresponding to this surrogate pair is returned. Otherwise,
   989      * the {@code char} value at the given index is returned.
   990      *
   991      * @param a the {@code char} array
   992      * @param index the index to the {@code char} values (Unicode
   993      * code units) in the {@code char} array to be converted
   994      * @return the Unicode code point at the given index
   995      * @exception NullPointerException if {@code a} is null.
   996      * @exception IndexOutOfBoundsException if the value
   997      * {@code index} is negative or not less than
   998      * the length of the {@code char} array.
   999      * @since  1.5
  1000      */
  1001     public static int codePointAt(char[] a, int index) {
  1002         return codePointAtImpl(a, index, a.length);
  1003     }
  1004 
  1005     /**
  1006      * Returns the code point at the given index of the
  1007      * {@code char} array, where only array elements with
  1008      * {@code index} less than {@code limit} can be used. If
  1009      * the {@code char} value at the given index in the
  1010      * {@code char} array is in the high-surrogate range, the
  1011      * following index is less than the {@code limit}, and the
  1012      * {@code char} value at the following index is in the
  1013      * low-surrogate range, then the supplementary code point
  1014      * corresponding to this surrogate pair is returned. Otherwise,
  1015      * the {@code char} value at the given index is returned.
  1016      *
  1017      * @param a the {@code char} array
  1018      * @param index the index to the {@code char} values (Unicode
  1019      * code units) in the {@code char} array to be converted
  1020      * @param limit the index after the last array element that
  1021      * can be used in the {@code char} array
  1022      * @return the Unicode code point at the given index
  1023      * @exception NullPointerException if {@code a} is null.
  1024      * @exception IndexOutOfBoundsException if the {@code index}
  1025      * argument is negative or not less than the {@code limit}
  1026      * argument, or if the {@code limit} argument is negative or
  1027      * greater than the length of the {@code char} array.
  1028      * @since  1.5
  1029      */
  1030     public static int codePointAt(char[] a, int index, int limit) {
  1031         if (index >= limit || limit < 0 || limit > a.length) {
  1032             throw new IndexOutOfBoundsException();
  1033         }
  1034         return codePointAtImpl(a, index, limit);
  1035     }
  1036 
  1037     // throws ArrayIndexOutofBoundsException if index out of bounds
  1038     static int codePointAtImpl(char[] a, int index, int limit) {
  1039         char c1 = a[index++];
  1040         if (isHighSurrogate(c1)) {
  1041             if (index < limit) {
  1042                 char c2 = a[index];
  1043                 if (isLowSurrogate(c2)) {
  1044                     return toCodePoint(c1, c2);
  1045                 }
  1046             }
  1047         }
  1048         return c1;
  1049     }
  1050 
  1051     /**
  1052      * Returns the code point preceding the given index of the
  1053      * {@code CharSequence}. If the {@code char} value at
  1054      * {@code (index - 1)} in the {@code CharSequence} is in
  1055      * the low-surrogate range, {@code (index - 2)} is not
  1056      * negative, and the {@code char} value at {@code (index - 2)}
  1057      * in the {@code CharSequence} is in the
  1058      * high-surrogate range, then the supplementary code point
  1059      * corresponding to this surrogate pair is returned. Otherwise,
  1060      * the {@code char} value at {@code (index - 1)} is
  1061      * returned.
  1062      *
  1063      * @param seq the {@code CharSequence} instance
  1064      * @param index the index following the code point that should be returned
  1065      * @return the Unicode code point value before the given index.
  1066      * @exception NullPointerException if {@code seq} is null.
  1067      * @exception IndexOutOfBoundsException if the {@code index}
  1068      * argument is less than 1 or greater than {@link
  1069      * CharSequence#length() seq.length()}.
  1070      * @since  1.5
  1071      */
  1072     public static int codePointBefore(CharSequence seq, int index) {
  1073         char c2 = seq.charAt(--index);
  1074         if (isLowSurrogate(c2)) {
  1075             if (index > 0) {
  1076                 char c1 = seq.charAt(--index);
  1077                 if (isHighSurrogate(c1)) {
  1078                     return toCodePoint(c1, c2);
  1079                 }
  1080             }
  1081         }
  1082         return c2;
  1083     }
  1084 
  1085     /**
  1086      * Returns the code point preceding the given index of the
  1087      * {@code char} array. If the {@code char} value at
  1088      * {@code (index - 1)} in the {@code char} array is in
  1089      * the low-surrogate range, {@code (index - 2)} is not
  1090      * negative, and the {@code char} value at {@code (index - 2)}
  1091      * in the {@code char} array is in the
  1092      * high-surrogate range, then the supplementary code point
  1093      * corresponding to this surrogate pair is returned. Otherwise,
  1094      * the {@code char} value at {@code (index - 1)} is
  1095      * returned.
  1096      *
  1097      * @param a the {@code char} array
  1098      * @param index the index following the code point that should be returned
  1099      * @return the Unicode code point value before the given index.
  1100      * @exception NullPointerException if {@code a} is null.
  1101      * @exception IndexOutOfBoundsException if the {@code index}
  1102      * argument is less than 1 or greater than the length of the
  1103      * {@code char} array
  1104      * @since  1.5
  1105      */
  1106     public static int codePointBefore(char[] a, int index) {
  1107         return codePointBeforeImpl(a, index, 0);
  1108     }
  1109 
  1110     /**
  1111      * Returns the code point preceding the given index of the
  1112      * {@code char} array, where only array elements with
  1113      * {@code index} greater than or equal to {@code start}
  1114      * can be used. If the {@code char} value at {@code (index - 1)}
  1115      * in the {@code char} array is in the
  1116      * low-surrogate range, {@code (index - 2)} is not less than
  1117      * {@code start}, and the {@code char} value at
  1118      * {@code (index - 2)} in the {@code char} array is in
  1119      * the high-surrogate range, then the supplementary code point
  1120      * corresponding to this surrogate pair is returned. Otherwise,
  1121      * the {@code char} value at {@code (index - 1)} is
  1122      * returned.
  1123      *
  1124      * @param a the {@code char} array
  1125      * @param index the index following the code point that should be returned
  1126      * @param start the index of the first array element in the
  1127      * {@code char} array
  1128      * @return the Unicode code point value before the given index.
  1129      * @exception NullPointerException if {@code a} is null.
  1130      * @exception IndexOutOfBoundsException if the {@code index}
  1131      * argument is not greater than the {@code start} argument or
  1132      * is greater than the length of the {@code char} array, or
  1133      * if the {@code start} argument is negative or not less than
  1134      * the length of the {@code char} array.
  1135      * @since  1.5
  1136      */
  1137     public static int codePointBefore(char[] a, int index, int start) {
  1138         if (index <= start || start < 0 || start >= a.length) {
  1139             throw new IndexOutOfBoundsException();
  1140         }
  1141         return codePointBeforeImpl(a, index, start);
  1142     }
  1143 
  1144     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
  1145     static int codePointBeforeImpl(char[] a, int index, int start) {
  1146         char c2 = a[--index];
  1147         if (isLowSurrogate(c2)) {
  1148             if (index > start) {
  1149                 char c1 = a[--index];
  1150                 if (isHighSurrogate(c1)) {
  1151                     return toCodePoint(c1, c2);
  1152                 }
  1153             }
  1154         }
  1155         return c2;
  1156     }
  1157 
  1158     /**
  1159      * Returns the leading surrogate (a
  1160      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  1161      * high surrogate code unit</a>) of the
  1162      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1163      * surrogate pair</a>
  1164      * representing the specified supplementary character (Unicode
  1165      * code point) in the UTF-16 encoding.  If the specified character
  1166      * is not a
  1167      * <a href="Character.html#supplementary">supplementary character</a>,
  1168      * an unspecified {@code char} is returned.
  1169      *
  1170      * <p>If
  1171      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1172      * is {@code true}, then
  1173      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
  1174      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
  1175      * are also always {@code true}.
  1176      *
  1177      * @param   codePoint a supplementary character (Unicode code point)
  1178      * @return  the leading surrogate code unit used to represent the
  1179      *          character in the UTF-16 encoding
  1180      * @since   1.7
  1181      */
  1182     public static char highSurrogate(int codePoint) {
  1183         return (char) ((codePoint >>> 10)
  1184             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
  1185     }
  1186 
  1187     /**
  1188      * Returns the trailing surrogate (a
  1189      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  1190      * low surrogate code unit</a>) of the
  1191      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1192      * surrogate pair</a>
  1193      * representing the specified supplementary character (Unicode
  1194      * code point) in the UTF-16 encoding.  If the specified character
  1195      * is not a
  1196      * <a href="Character.html#supplementary">supplementary character</a>,
  1197      * an unspecified {@code char} is returned.
  1198      *
  1199      * <p>If
  1200      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1201      * is {@code true}, then
  1202      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
  1203      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
  1204      * are also always {@code true}.
  1205      *
  1206      * @param   codePoint a supplementary character (Unicode code point)
  1207      * @return  the trailing surrogate code unit used to represent the
  1208      *          character in the UTF-16 encoding
  1209      * @since   1.7
  1210      */
  1211     public static char lowSurrogate(int codePoint) {
  1212         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  1213     }
  1214 
  1215     /**
  1216      * Converts the specified character (Unicode code point) to its
  1217      * UTF-16 representation. If the specified code point is a BMP
  1218      * (Basic Multilingual Plane or Plane 0) value, the same value is
  1219      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
  1220      * specified code point is a supplementary character, its
  1221      * surrogate values are stored in {@code dst[dstIndex]}
  1222      * (high-surrogate) and {@code dst[dstIndex+1]}
  1223      * (low-surrogate), and 2 is returned.
  1224      *
  1225      * @param  codePoint the character (Unicode code point) to be converted.
  1226      * @param  dst an array of {@code char} in which the
  1227      * {@code codePoint}'s UTF-16 value is stored.
  1228      * @param dstIndex the start index into the {@code dst}
  1229      * array where the converted value is stored.
  1230      * @return 1 if the code point is a BMP code point, 2 if the
  1231      * code point is a supplementary code point.
  1232      * @exception IllegalArgumentException if the specified
  1233      * {@code codePoint} is not a valid Unicode code point.
  1234      * @exception NullPointerException if the specified {@code dst} is null.
  1235      * @exception IndexOutOfBoundsException if {@code dstIndex}
  1236      * is negative or not less than {@code dst.length}, or if
  1237      * {@code dst} at {@code dstIndex} doesn't have enough
  1238      * array element(s) to store the resulting {@code char}
  1239      * value(s). (If {@code dstIndex} is equal to
  1240      * {@code dst.length-1} and the specified
  1241      * {@code codePoint} is a supplementary character, the
  1242      * high-surrogate value is not stored in
  1243      * {@code dst[dstIndex]}.)
  1244      * @since  1.5
  1245      */
  1246     public static int toChars(int codePoint, char[] dst, int dstIndex) {
  1247         if (isBmpCodePoint(codePoint)) {
  1248             dst[dstIndex] = (char) codePoint;
  1249             return 1;
  1250         } else if (isValidCodePoint(codePoint)) {
  1251             toSurrogates(codePoint, dst, dstIndex);
  1252             return 2;
  1253         } else {
  1254             throw new IllegalArgumentException();
  1255         }
  1256     }
  1257 
  1258     /**
  1259      * Converts the specified character (Unicode code point) to its
  1260      * UTF-16 representation stored in a {@code char} array. If
  1261      * the specified code point is a BMP (Basic Multilingual Plane or
  1262      * Plane 0) value, the resulting {@code char} array has
  1263      * the same value as {@code codePoint}. If the specified code
  1264      * point is a supplementary code point, the resulting
  1265      * {@code char} array has the corresponding surrogate pair.
  1266      *
  1267      * @param  codePoint a Unicode code point
  1268      * @return a {@code char} array having
  1269      *         {@code codePoint}'s UTF-16 representation.
  1270      * @exception IllegalArgumentException if the specified
  1271      * {@code codePoint} is not a valid Unicode code point.
  1272      * @since  1.5
  1273      */
  1274     public static char[] toChars(int codePoint) {
  1275         if (isBmpCodePoint(codePoint)) {
  1276             return new char[] { (char) codePoint };
  1277         } else if (isValidCodePoint(codePoint)) {
  1278             char[] result = new char[2];
  1279             toSurrogates(codePoint, result, 0);
  1280             return result;
  1281         } else {
  1282             throw new IllegalArgumentException();
  1283         }
  1284     }
  1285 
  1286     static void toSurrogates(int codePoint, char[] dst, int index) {
  1287         // We write elements "backwards" to guarantee all-or-nothing
  1288         dst[index+1] = lowSurrogate(codePoint);
  1289         dst[index] = highSurrogate(codePoint);
  1290     }
  1291 
  1292     /**
  1293      * Returns the number of Unicode code points in the text range of
  1294      * the specified char sequence. The text range begins at the
  1295      * specified {@code beginIndex} and extends to the
  1296      * {@code char} at index {@code endIndex - 1}. Thus the
  1297      * length (in {@code char}s) of the text range is
  1298      * {@code endIndex-beginIndex}. Unpaired surrogates within
  1299      * the text range count as one code point each.
  1300      *
  1301      * @param seq the char sequence
  1302      * @param beginIndex the index to the first {@code char} of
  1303      * the text range.
  1304      * @param endIndex the index after the last {@code char} of
  1305      * the text range.
  1306      * @return the number of Unicode code points in the specified text
  1307      * range
  1308      * @exception NullPointerException if {@code seq} is null.
  1309      * @exception IndexOutOfBoundsException if the
  1310      * {@code beginIndex} is negative, or {@code endIndex}
  1311      * is larger than the length of the given sequence, or
  1312      * {@code beginIndex} is larger than {@code endIndex}.
  1313      * @since  1.5
  1314      */
  1315     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
  1316         int length = seq.length();
  1317         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
  1318             throw new IndexOutOfBoundsException();
  1319         }
  1320         int n = endIndex - beginIndex;
  1321         for (int i = beginIndex; i < endIndex; ) {
  1322             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
  1323                 isLowSurrogate(seq.charAt(i))) {
  1324                 n--;
  1325                 i++;
  1326             }
  1327         }
  1328         return n;
  1329     }
  1330 
  1331     /**
  1332      * Returns the number of Unicode code points in a subarray of the
  1333      * {@code char} array argument. The {@code offset}
  1334      * argument is the index of the first {@code char} of the
  1335      * subarray and the {@code count} argument specifies the
  1336      * length of the subarray in {@code char}s. Unpaired
  1337      * surrogates within the subarray count as one code point each.
  1338      *
  1339      * @param a the {@code char} array
  1340      * @param offset the index of the first {@code char} in the
  1341      * given {@code char} array
  1342      * @param count the length of the subarray in {@code char}s
  1343      * @return the number of Unicode code points in the specified subarray
  1344      * @exception NullPointerException if {@code a} is null.
  1345      * @exception IndexOutOfBoundsException if {@code offset} or
  1346      * {@code count} is negative, or if {@code offset +
  1347      * count} is larger than the length of the given array.
  1348      * @since  1.5
  1349      */
  1350     public static int codePointCount(char[] a, int offset, int count) {
  1351         if (count > a.length - offset || offset < 0 || count < 0) {
  1352             throw new IndexOutOfBoundsException();
  1353         }
  1354         return codePointCountImpl(a, offset, count);
  1355     }
  1356 
  1357     static int codePointCountImpl(char[] a, int offset, int count) {
  1358         int endIndex = offset + count;
  1359         int n = count;
  1360         for (int i = offset; i < endIndex; ) {
  1361             if (isHighSurrogate(a[i++]) && i < endIndex &&
  1362                 isLowSurrogate(a[i])) {
  1363                 n--;
  1364                 i++;
  1365             }
  1366         }
  1367         return n;
  1368     }
  1369 
  1370     /**
  1371      * Returns the index within the given char sequence that is offset
  1372      * from the given {@code index} by {@code codePointOffset}
  1373      * code points. Unpaired surrogates within the text range given by
  1374      * {@code index} and {@code codePointOffset} count as
  1375      * one code point each.
  1376      *
  1377      * @param seq the char sequence
  1378      * @param index the index to be offset
  1379      * @param codePointOffset the offset in code points
  1380      * @return the index within the char sequence
  1381      * @exception NullPointerException if {@code seq} is null.
  1382      * @exception IndexOutOfBoundsException if {@code index}
  1383      *   is negative or larger then the length of the char sequence,
  1384      *   or if {@code codePointOffset} is positive and the
  1385      *   subsequence starting with {@code index} has fewer than
  1386      *   {@code codePointOffset} code points, or if
  1387      *   {@code codePointOffset} is negative and the subsequence
  1388      *   before {@code index} has fewer than the absolute value
  1389      *   of {@code codePointOffset} code points.
  1390      * @since 1.5
  1391      */
  1392     public static int offsetByCodePoints(CharSequence seq, int index,
  1393                                          int codePointOffset) {
  1394         int length = seq.length();
  1395         if (index < 0 || index > length) {
  1396             throw new IndexOutOfBoundsException();
  1397         }
  1398 
  1399         int x = index;
  1400         if (codePointOffset >= 0) {
  1401             int i;
  1402             for (i = 0; x < length && i < codePointOffset; i++) {
  1403                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
  1404                     isLowSurrogate(seq.charAt(x))) {
  1405                     x++;
  1406                 }
  1407             }
  1408             if (i < codePointOffset) {
  1409                 throw new IndexOutOfBoundsException();
  1410             }
  1411         } else {
  1412             int i;
  1413             for (i = codePointOffset; x > 0 && i < 0; i++) {
  1414                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
  1415                     isHighSurrogate(seq.charAt(x-1))) {
  1416                     x--;
  1417                 }
  1418             }
  1419             if (i < 0) {
  1420                 throw new IndexOutOfBoundsException();
  1421             }
  1422         }
  1423         return x;
  1424     }
  1425 
  1426     /**
  1427      * Returns the index within the given {@code char} subarray
  1428      * that is offset from the given {@code index} by
  1429      * {@code codePointOffset} code points. The
  1430      * {@code start} and {@code count} arguments specify a
  1431      * subarray of the {@code char} array. Unpaired surrogates
  1432      * within the text range given by {@code index} and
  1433      * {@code codePointOffset} count as one code point each.
  1434      *
  1435      * @param a the {@code char} array
  1436      * @param start the index of the first {@code char} of the
  1437      * subarray
  1438      * @param count the length of the subarray in {@code char}s
  1439      * @param index the index to be offset
  1440      * @param codePointOffset the offset in code points
  1441      * @return the index within the subarray
  1442      * @exception NullPointerException if {@code a} is null.
  1443      * @exception IndexOutOfBoundsException
  1444      *   if {@code start} or {@code count} is negative,
  1445      *   or if {@code start + count} is larger than the length of
  1446      *   the given array,
  1447      *   or if {@code index} is less than {@code start} or
  1448      *   larger then {@code start + count},
  1449      *   or if {@code codePointOffset} is positive and the text range
  1450      *   starting with {@code index} and ending with {@code start + count - 1}
  1451      *   has fewer than {@code codePointOffset} code
  1452      *   points,
  1453      *   or if {@code codePointOffset} is negative and the text range
  1454      *   starting with {@code start} and ending with {@code index - 1}
  1455      *   has fewer than the absolute value of
  1456      *   {@code codePointOffset} code points.
  1457      * @since 1.5
  1458      */
  1459     public static int offsetByCodePoints(char[] a, int start, int count,
  1460                                          int index, int codePointOffset) {
  1461         if (count > a.length-start || start < 0 || count < 0
  1462             || index < start || index > start+count) {
  1463             throw new IndexOutOfBoundsException();
  1464         }
  1465         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
  1466     }
  1467 
  1468     static int offsetByCodePointsImpl(char[]a, int start, int count,
  1469                                       int index, int codePointOffset) {
  1470         int x = index;
  1471         if (codePointOffset >= 0) {
  1472             int limit = start + count;
  1473             int i;
  1474             for (i = 0; x < limit && i < codePointOffset; i++) {
  1475                 if (isHighSurrogate(a[x++]) && x < limit &&
  1476                     isLowSurrogate(a[x])) {
  1477                     x++;
  1478                 }
  1479             }
  1480             if (i < codePointOffset) {
  1481                 throw new IndexOutOfBoundsException();
  1482             }
  1483         } else {
  1484             int i;
  1485             for (i = codePointOffset; x > start && i < 0; i++) {
  1486                 if (isLowSurrogate(a[--x]) && x > start &&
  1487                     isHighSurrogate(a[x-1])) {
  1488                     x--;
  1489                 }
  1490             }
  1491             if (i < 0) {
  1492                 throw new IndexOutOfBoundsException();
  1493             }
  1494         }
  1495         return x;
  1496     }
  1497 
  1498     /**
  1499      * Determines if the specified character is a lowercase character.
  1500      * <p>
  1501      * A character is lowercase if its general category type, provided
  1502      * by {@code Character.getType(ch)}, is
  1503      * {@code LOWERCASE_LETTER}, or it has contributory property
  1504      * Other_Lowercase as defined by the Unicode Standard.
  1505      * <p>
  1506      * The following are examples of lowercase characters:
  1507      * <p><blockquote><pre>
  1508      * a b c d e f g h i j k l m n o p q r s t u v w x y z
  1509      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
  1510      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
  1511      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
  1512      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
  1513      * </pre></blockquote>
  1514      * <p> Many other Unicode characters are lowercase too.
  1515      *
  1516      * <p><b>Note:</b> This method cannot handle <a
  1517      * href="#supplementary"> supplementary characters</a>. To support
  1518      * all Unicode characters, including supplementary characters, use
  1519      * the {@link #isLowerCase(int)} method.
  1520      *
  1521      * @param   ch   the character to be tested.
  1522      * @return  {@code true} if the character is lowercase;
  1523      *          {@code false} otherwise.
  1524      * @see     Character#isLowerCase(char)
  1525      * @see     Character#isTitleCase(char)
  1526      * @see     Character#toLowerCase(char)
  1527      * @see     Character#getType(char)
  1528      */
  1529     public static boolean isLowerCase(char ch) {
  1530         return ch == toLowerCase(ch);
  1531     }
  1532 
  1533     /**
  1534      * Determines if the specified character is an uppercase character.
  1535      * <p>
  1536      * A character is uppercase if its general category type, provided by
  1537      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
  1538      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
  1539      * <p>
  1540      * The following are examples of uppercase characters:
  1541      * <p><blockquote><pre>
  1542      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  1543      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
  1544      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
  1545      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
  1546      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
  1547      * </pre></blockquote>
  1548      * <p> Many other Unicode characters are uppercase too.<p>
  1549      *
  1550      * <p><b>Note:</b> This method cannot handle <a
  1551      * href="#supplementary"> supplementary characters</a>. To support
  1552      * all Unicode characters, including supplementary characters, use
  1553      * the {@link #isUpperCase(int)} method.
  1554      *
  1555      * @param   ch   the character to be tested.
  1556      * @return  {@code true} if the character is uppercase;
  1557      *          {@code false} otherwise.
  1558      * @see     Character#isLowerCase(char)
  1559      * @see     Character#isTitleCase(char)
  1560      * @see     Character#toUpperCase(char)
  1561      * @see     Character#getType(char)
  1562      * @since   1.0
  1563      */
  1564     public static boolean isUpperCase(char ch) {
  1565         return ch == toUpperCase(ch);
  1566     }
  1567 
  1568     /**
  1569      * Determines if the specified character is a titlecase character.
  1570      * <p>
  1571      * A character is a titlecase character if its general
  1572      * category type, provided by {@code Character.getType(ch)},
  1573      * is {@code TITLECASE_LETTER}.
  1574      * <p>
  1575      * Some characters look like pairs of Latin letters. For example, there
  1576      * is an uppercase letter that looks like "LJ" and has a corresponding
  1577      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1578      * is the appropriate form to use when rendering a word in lowercase
  1579      * with initial capitals, as for a book title.
  1580      * <p>
  1581      * These are some of the Unicode characters for which this method returns
  1582      * {@code true}:
  1583      * <ul>
  1584      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1585      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1586      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1587      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1588      * </ul>
  1589      * <p> Many other Unicode characters are titlecase too.<p>
  1590      *
  1591      * <p><b>Note:</b> This method cannot handle <a
  1592      * href="#supplementary"> supplementary characters</a>. To support
  1593      * all Unicode characters, including supplementary characters, use
  1594      * the {@link #isTitleCase(int)} method.
  1595      *
  1596      * @param   ch   the character to be tested.
  1597      * @return  {@code true} if the character is titlecase;
  1598      *          {@code false} otherwise.
  1599      * @see     Character#isLowerCase(char)
  1600      * @see     Character#isUpperCase(char)
  1601      * @see     Character#toTitleCase(char)
  1602      * @see     Character#getType(char)
  1603      * @since   1.0.2
  1604      */
  1605     public static boolean isTitleCase(char ch) {
  1606         return isTitleCase((int)ch);
  1607     }
  1608 
  1609     /**
  1610      * Determines if the specified character (Unicode code point) is a titlecase character.
  1611      * <p>
  1612      * A character is a titlecase character if its general
  1613      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1614      * is {@code TITLECASE_LETTER}.
  1615      * <p>
  1616      * Some characters look like pairs of Latin letters. For example, there
  1617      * is an uppercase letter that looks like "LJ" and has a corresponding
  1618      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1619      * is the appropriate form to use when rendering a word in lowercase
  1620      * with initial capitals, as for a book title.
  1621      * <p>
  1622      * These are some of the Unicode characters for which this method returns
  1623      * {@code true}:
  1624      * <ul>
  1625      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1626      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1627      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1628      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1629      * </ul>
  1630      * <p> Many other Unicode characters are titlecase too.<p>
  1631      *
  1632      * @param   codePoint the character (Unicode code point) to be tested.
  1633      * @return  {@code true} if the character is titlecase;
  1634      *          {@code false} otherwise.
  1635      * @see     Character#isLowerCase(int)
  1636      * @see     Character#isUpperCase(int)
  1637      * @see     Character#toTitleCase(int)
  1638      * @see     Character#getType(int)
  1639      * @since   1.5
  1640      */
  1641     public static boolean isTitleCase(int codePoint) {
  1642         return getType(codePoint) == Character.TITLECASE_LETTER;
  1643     }
  1644 
  1645     /**
  1646      * Determines if the specified character is a digit.
  1647      * <p>
  1648      * A character is a digit if its general category type, provided
  1649      * by {@code Character.getType(ch)}, is
  1650      * {@code DECIMAL_DIGIT_NUMBER}.
  1651      * <p>
  1652      * Some Unicode character ranges that contain digits:
  1653      * <ul>
  1654      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1655      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1656      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1657      *     Arabic-Indic digits
  1658      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1659      *     Extended Arabic-Indic digits
  1660      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1661      *     Devanagari digits
  1662      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1663      *     Fullwidth digits
  1664      * </ul>
  1665      *
  1666      * Many other character ranges contain digits as well.
  1667      *
  1668      * <p><b>Note:</b> This method cannot handle <a
  1669      * href="#supplementary"> supplementary characters</a>. To support
  1670      * all Unicode characters, including supplementary characters, use
  1671      * the {@link #isDigit(int)} method.
  1672      *
  1673      * @param   ch   the character to be tested.
  1674      * @return  {@code true} if the character is a digit;
  1675      *          {@code false} otherwise.
  1676      * @see     Character#digit(char, int)
  1677      * @see     Character#forDigit(int, int)
  1678      * @see     Character#getType(char)
  1679      */
  1680     public static boolean isDigit(char ch) {
  1681         return String.valueOf(ch).matches("\\d");
  1682     }
  1683 
  1684     /**
  1685      * Determines if the specified character (Unicode code point) is a digit.
  1686      * <p>
  1687      * A character is a digit if its general category type, provided
  1688      * by {@link Character#getType(int) getType(codePoint)}, is
  1689      * {@code DECIMAL_DIGIT_NUMBER}.
  1690      * <p>
  1691      * Some Unicode character ranges that contain digits:
  1692      * <ul>
  1693      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1694      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1695      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1696      *     Arabic-Indic digits
  1697      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1698      *     Extended Arabic-Indic digits
  1699      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1700      *     Devanagari digits
  1701      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1702      *     Fullwidth digits
  1703      * </ul>
  1704      *
  1705      * Many other character ranges contain digits as well.
  1706      *
  1707      * @param   codePoint the character (Unicode code point) to be tested.
  1708      * @return  {@code true} if the character is a digit;
  1709      *          {@code false} otherwise.
  1710      * @see     Character#forDigit(int, int)
  1711      * @see     Character#getType(int)
  1712      * @since   1.5
  1713      */
  1714     public static boolean isDigit(int codePoint) {
  1715         return fromCodeChars(codePoint).matches("\\d");
  1716     }
  1717     
  1718     @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
  1719     private native static String fromCodeChars(int codePoint);
  1720 
  1721     /**
  1722      * Determines if a character is defined in Unicode.
  1723      * <p>
  1724      * A character is defined if at least one of the following is true:
  1725      * <ul>
  1726      * <li>It has an entry in the UnicodeData file.
  1727      * <li>It has a value in a range defined by the UnicodeData file.
  1728      * </ul>
  1729      *
  1730      * <p><b>Note:</b> This method cannot handle <a
  1731      * href="#supplementary"> supplementary characters</a>. To support
  1732      * all Unicode characters, including supplementary characters, use
  1733      * the {@link #isDefined(int)} method.
  1734      *
  1735      * @param   ch   the character to be tested
  1736      * @return  {@code true} if the character has a defined meaning
  1737      *          in Unicode; {@code false} otherwise.
  1738      * @see     Character#isDigit(char)
  1739      * @see     Character#isLetter(char)
  1740      * @see     Character#isLetterOrDigit(char)
  1741      * @see     Character#isLowerCase(char)
  1742      * @see     Character#isTitleCase(char)
  1743      * @see     Character#isUpperCase(char)
  1744      * @since   1.0.2
  1745      */
  1746     public static boolean isDefined(char ch) {
  1747         return isDefined((int)ch);
  1748     }
  1749 
  1750     /**
  1751      * Determines if a character (Unicode code point) is defined in Unicode.
  1752      * <p>
  1753      * A character is defined if at least one of the following is true:
  1754      * <ul>
  1755      * <li>It has an entry in the UnicodeData file.
  1756      * <li>It has a value in a range defined by the UnicodeData file.
  1757      * </ul>
  1758      *
  1759      * @param   codePoint the character (Unicode code point) to be tested.
  1760      * @return  {@code true} if the character has a defined meaning
  1761      *          in Unicode; {@code false} otherwise.
  1762      * @see     Character#isDigit(int)
  1763      * @see     Character#isLetter(int)
  1764      * @see     Character#isLetterOrDigit(int)
  1765      * @see     Character#isLowerCase(int)
  1766      * @see     Character#isTitleCase(int)
  1767      * @see     Character#isUpperCase(int)
  1768      * @since   1.5
  1769      */
  1770     public static boolean isDefined(int codePoint) {
  1771         return getType(codePoint) != Character.UNASSIGNED;
  1772     }
  1773 
  1774     /**
  1775      * Determines if the specified character is a letter.
  1776      * <p>
  1777      * A character is considered to be a letter if its general
  1778      * category type, provided by {@code Character.getType(ch)},
  1779      * is any of the following:
  1780      * <ul>
  1781      * <li> {@code UPPERCASE_LETTER}
  1782      * <li> {@code LOWERCASE_LETTER}
  1783      * <li> {@code TITLECASE_LETTER}
  1784      * <li> {@code MODIFIER_LETTER}
  1785      * <li> {@code OTHER_LETTER}
  1786      * </ul>
  1787      *
  1788      * Not all letters have case. Many characters are
  1789      * letters but are neither uppercase nor lowercase nor titlecase.
  1790      *
  1791      * <p><b>Note:</b> This method cannot handle <a
  1792      * href="#supplementary"> supplementary characters</a>. To support
  1793      * all Unicode characters, including supplementary characters, use
  1794      * the {@link #isLetter(int)} method.
  1795      *
  1796      * @param   ch   the character to be tested.
  1797      * @return  {@code true} if the character is a letter;
  1798      *          {@code false} otherwise.
  1799      * @see     Character#isDigit(char)
  1800      * @see     Character#isJavaIdentifierStart(char)
  1801      * @see     Character#isJavaLetter(char)
  1802      * @see     Character#isJavaLetterOrDigit(char)
  1803      * @see     Character#isLetterOrDigit(char)
  1804      * @see     Character#isLowerCase(char)
  1805      * @see     Character#isTitleCase(char)
  1806      * @see     Character#isUnicodeIdentifierStart(char)
  1807      * @see     Character#isUpperCase(char)
  1808      */
  1809     public static boolean isLetter(char ch) {
  1810         return String.valueOf(ch).matches("\\w") && !isDigit(ch);
  1811     }
  1812 
  1813     /**
  1814      * Determines if the specified character (Unicode code point) is a letter.
  1815      * <p>
  1816      * A character is considered to be a letter if its general
  1817      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1818      * is any of the following:
  1819      * <ul>
  1820      * <li> {@code UPPERCASE_LETTER}
  1821      * <li> {@code LOWERCASE_LETTER}
  1822      * <li> {@code TITLECASE_LETTER}
  1823      * <li> {@code MODIFIER_LETTER}
  1824      * <li> {@code OTHER_LETTER}
  1825      * </ul>
  1826      *
  1827      * Not all letters have case. Many characters are
  1828      * letters but are neither uppercase nor lowercase nor titlecase.
  1829      *
  1830      * @param   codePoint the character (Unicode code point) to be tested.
  1831      * @return  {@code true} if the character is a letter;
  1832      *          {@code false} otherwise.
  1833      * @see     Character#isDigit(int)
  1834      * @see     Character#isJavaIdentifierStart(int)
  1835      * @see     Character#isLetterOrDigit(int)
  1836      * @see     Character#isLowerCase(int)
  1837      * @see     Character#isTitleCase(int)
  1838      * @see     Character#isUnicodeIdentifierStart(int)
  1839      * @see     Character#isUpperCase(int)
  1840      * @since   1.5
  1841      */
  1842     public static boolean isLetter(int codePoint) {
  1843         return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
  1844     }
  1845 
  1846     /**
  1847      * Determines if the specified character is a letter or digit.
  1848      * <p>
  1849      * A character is considered to be a letter or digit if either
  1850      * {@code Character.isLetter(char ch)} or
  1851      * {@code Character.isDigit(char ch)} returns
  1852      * {@code true} for the character.
  1853      *
  1854      * <p><b>Note:</b> This method cannot handle <a
  1855      * href="#supplementary"> supplementary characters</a>. To support
  1856      * all Unicode characters, including supplementary characters, use
  1857      * the {@link #isLetterOrDigit(int)} method.
  1858      *
  1859      * @param   ch   the character to be tested.
  1860      * @return  {@code true} if the character is a letter or digit;
  1861      *          {@code false} otherwise.
  1862      * @see     Character#isDigit(char)
  1863      * @see     Character#isJavaIdentifierPart(char)
  1864      * @see     Character#isJavaLetter(char)
  1865      * @see     Character#isJavaLetterOrDigit(char)
  1866      * @see     Character#isLetter(char)
  1867      * @see     Character#isUnicodeIdentifierPart(char)
  1868      * @since   1.0.2
  1869      */
  1870     public static boolean isLetterOrDigit(char ch) {
  1871         return String.valueOf(ch).matches("\\w");
  1872     }
  1873 
  1874     /**
  1875      * Determines if the specified character (Unicode code point) is a letter or digit.
  1876      * <p>
  1877      * A character is considered to be a letter or digit if either
  1878      * {@link #isLetter(int) isLetter(codePoint)} or
  1879      * {@link #isDigit(int) isDigit(codePoint)} returns
  1880      * {@code true} for the character.
  1881      *
  1882      * @param   codePoint the character (Unicode code point) to be tested.
  1883      * @return  {@code true} if the character is a letter or digit;
  1884      *          {@code false} otherwise.
  1885      * @see     Character#isDigit(int)
  1886      * @see     Character#isJavaIdentifierPart(int)
  1887      * @see     Character#isLetter(int)
  1888      * @see     Character#isUnicodeIdentifierPart(int)
  1889      * @since   1.5
  1890      */
  1891     public static boolean isLetterOrDigit(int codePoint) {
  1892         return fromCodeChars(codePoint).matches("\\w");
  1893     }
  1894     
  1895     static int getType(int x) {
  1896         throw new UnsupportedOperationException();
  1897     }
  1898  
  1899     /**
  1900      * Determines if the specified character is
  1901      * permissible as the first character in a Java identifier.
  1902      * <p>
  1903      * A character may start a Java identifier if and only if
  1904      * one of the following conditions is true:
  1905      * <ul>
  1906      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
  1907      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
  1908      * <li> {@code ch} is a currency symbol (such as {@code '$'})
  1909      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
  1910      * </ul>
  1911      *
  1912      * <p><b>Note:</b> This method cannot handle <a
  1913      * href="#supplementary"> supplementary characters</a>. To support
  1914      * all Unicode characters, including supplementary characters, use
  1915      * the {@link #isJavaIdentifierStart(int)} method.
  1916      *
  1917      * @param   ch the character to be tested.
  1918      * @return  {@code true} if the character may start a Java identifier;
  1919      *          {@code false} otherwise.
  1920      * @see     Character#isJavaIdentifierPart(char)
  1921      * @see     Character#isLetter(char)
  1922      * @see     Character#isUnicodeIdentifierStart(char)
  1923      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1924      * @since   1.1
  1925      */
  1926     public static boolean isJavaIdentifierStart(char ch) {
  1927         return isJavaIdentifierStart((int)ch);
  1928     }
  1929 
  1930     /**
  1931      * Determines if the character (Unicode code point) is
  1932      * permissible as the first character in a Java identifier.
  1933      * <p>
  1934      * A character may start a Java identifier if and only if
  1935      * one of the following conditions is true:
  1936      * <ul>
  1937      * <li> {@link #isLetter(int) isLetter(codePoint)}
  1938      *      returns {@code true}
  1939      * <li> {@link #getType(int) getType(codePoint)}
  1940      *      returns {@code LETTER_NUMBER}
  1941      * <li> the referenced character is a currency symbol (such as {@code '$'})
  1942      * <li> the referenced character is a connecting punctuation character
  1943      *      (such as {@code '_'}).
  1944      * </ul>
  1945      *
  1946      * @param   codePoint the character (Unicode code point) to be tested.
  1947      * @return  {@code true} if the character may start a Java identifier;
  1948      *          {@code false} otherwise.
  1949      * @see     Character#isJavaIdentifierPart(int)
  1950      * @see     Character#isLetter(int)
  1951      * @see     Character#isUnicodeIdentifierStart(int)
  1952      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1953      * @since   1.5
  1954      */
  1955     public static boolean isJavaIdentifierStart(int codePoint) {
  1956         return 
  1957             ('A' <= codePoint && codePoint <= 'Z') ||
  1958             ('a' <= codePoint && codePoint <= 'z');
  1959     }
  1960 
  1961     /**
  1962      * Determines if the specified character may be part of a Java
  1963      * identifier as other than the first character.
  1964      * <p>
  1965      * A character may be part of a Java identifier if any of the following
  1966      * are true:
  1967      * <ul>
  1968      * <li>  it is a letter
  1969      * <li>  it is a currency symbol (such as {@code '$'})
  1970      * <li>  it is a connecting punctuation character (such as {@code '_'})
  1971      * <li>  it is a digit
  1972      * <li>  it is a numeric letter (such as a Roman numeral character)
  1973      * <li>  it is a combining mark
  1974      * <li>  it is a non-spacing mark
  1975      * <li> {@code isIdentifierIgnorable} returns
  1976      * {@code true} for the character
  1977      * </ul>
  1978      *
  1979      * <p><b>Note:</b> This method cannot handle <a
  1980      * href="#supplementary"> supplementary characters</a>. To support
  1981      * all Unicode characters, including supplementary characters, use
  1982      * the {@link #isJavaIdentifierPart(int)} method.
  1983      *
  1984      * @param   ch      the character to be tested.
  1985      * @return {@code true} if the character may be part of a
  1986      *          Java identifier; {@code false} otherwise.
  1987      * @see     Character#isIdentifierIgnorable(char)
  1988      * @see     Character#isJavaIdentifierStart(char)
  1989      * @see     Character#isLetterOrDigit(char)
  1990      * @see     Character#isUnicodeIdentifierPart(char)
  1991      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1992      * @since   1.1
  1993      */
  1994     public static boolean isJavaIdentifierPart(char ch) {
  1995         return isJavaIdentifierPart((int)ch);
  1996     }
  1997 
  1998     /**
  1999      * Determines if the character (Unicode code point) may be part of a Java
  2000      * identifier as other than the first character.
  2001      * <p>
  2002      * A character may be part of a Java identifier if any of the following
  2003      * are true:
  2004      * <ul>
  2005      * <li>  it is a letter
  2006      * <li>  it is a currency symbol (such as {@code '$'})
  2007      * <li>  it is a connecting punctuation character (such as {@code '_'})
  2008      * <li>  it is a digit
  2009      * <li>  it is a numeric letter (such as a Roman numeral character)
  2010      * <li>  it is a combining mark
  2011      * <li>  it is a non-spacing mark
  2012      * <li> {@link #isIdentifierIgnorable(int)
  2013      * isIdentifierIgnorable(codePoint)} returns {@code true} for
  2014      * the character
  2015      * </ul>
  2016      *
  2017      * @param   codePoint the character (Unicode code point) to be tested.
  2018      * @return {@code true} if the character may be part of a
  2019      *          Java identifier; {@code false} otherwise.
  2020      * @see     Character#isIdentifierIgnorable(int)
  2021      * @see     Character#isJavaIdentifierStart(int)
  2022      * @see     Character#isLetterOrDigit(int)
  2023      * @see     Character#isUnicodeIdentifierPart(int)
  2024      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  2025      * @since   1.5
  2026      */
  2027     public static boolean isJavaIdentifierPart(int codePoint) {
  2028         return isJavaIdentifierStart(codePoint) ||
  2029             ('0' <= codePoint && codePoint <= '9') || codePoint == '$';
  2030     }
  2031    
  2032     /**
  2033      * Converts the character argument to lowercase using case
  2034      * mapping information from the UnicodeData file.
  2035      * <p>
  2036      * Note that
  2037      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
  2038      * does not always return {@code true} for some ranges of
  2039      * characters, particularly those that are symbols or ideographs.
  2040      *
  2041      * <p>In general, {@link String#toLowerCase()} should be used to map
  2042      * characters to lowercase. {@code String} case mapping methods
  2043      * have several benefits over {@code Character} case mapping methods.
  2044      * {@code String} case mapping methods can perform locale-sensitive
  2045      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  2046      * the {@code Character} case mapping methods cannot.
  2047      *
  2048      * <p><b>Note:</b> This method cannot handle <a
  2049      * href="#supplementary"> supplementary characters</a>. To support
  2050      * all Unicode characters, including supplementary characters, use
  2051      * the {@link #toLowerCase(int)} method.
  2052      *
  2053      * @param   ch   the character to be converted.
  2054      * @return  the lowercase equivalent of the character, if any;
  2055      *          otherwise, the character itself.
  2056      * @see     Character#isLowerCase(char)
  2057      * @see     String#toLowerCase()
  2058      */
  2059     public static char toLowerCase(char ch) {
  2060         return String.valueOf(ch).toLowerCase().charAt(0);
  2061     }
  2062 
  2063     /**
  2064      * Converts the character argument to uppercase using case mapping
  2065      * information from the UnicodeData file.
  2066      * <p>
  2067      * Note that
  2068      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
  2069      * does not always return {@code true} for some ranges of
  2070      * characters, particularly those that are symbols or ideographs.
  2071      *
  2072      * <p>In general, {@link String#toUpperCase()} should be used to map
  2073      * characters to uppercase. {@code String} case mapping methods
  2074      * have several benefits over {@code Character} case mapping methods.
  2075      * {@code String} case mapping methods can perform locale-sensitive
  2076      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  2077      * the {@code Character} case mapping methods cannot.
  2078      *
  2079      * <p><b>Note:</b> This method cannot handle <a
  2080      * href="#supplementary"> supplementary characters</a>. To support
  2081      * all Unicode characters, including supplementary characters, use
  2082      * the {@link #toUpperCase(int)} method.
  2083      *
  2084      * @param   ch   the character to be converted.
  2085      * @return  the uppercase equivalent of the character, if any;
  2086      *          otherwise, the character itself.
  2087      * @see     Character#isUpperCase(char)
  2088      * @see     String#toUpperCase()
  2089      */
  2090     public static char toUpperCase(char ch) {
  2091         return String.valueOf(ch).toUpperCase().charAt(0);
  2092     }
  2093 
  2094     /**
  2095      * Returns the numeric value of the character {@code ch} in the
  2096      * specified radix.
  2097      * <p>
  2098      * If the radix is not in the range {@code MIN_RADIX} &le;
  2099      * {@code radix} &le; {@code MAX_RADIX} or if the
  2100      * value of {@code ch} is not a valid digit in the specified
  2101      * radix, {@code -1} is returned. A character is a valid digit
  2102      * if at least one of the following is true:
  2103      * <ul>
  2104      * <li>The method {@code isDigit} is {@code true} of the character
  2105      *     and the Unicode decimal digit value of the character (or its
  2106      *     single-character decomposition) is less than the specified radix.
  2107      *     In this case the decimal digit value is returned.
  2108      * <li>The character is one of the uppercase Latin letters
  2109      *     {@code 'A'} through {@code 'Z'} and its code is less than
  2110      *     {@code radix + 'A' - 10}.
  2111      *     In this case, {@code ch - 'A' + 10}
  2112      *     is returned.
  2113      * <li>The character is one of the lowercase Latin letters
  2114      *     {@code 'a'} through {@code 'z'} and its code is less than
  2115      *     {@code radix + 'a' - 10}.
  2116      *     In this case, {@code ch - 'a' + 10}
  2117      *     is returned.
  2118      * <li>The character is one of the fullwidth uppercase Latin letters A
  2119      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  2120      *     and its code is less than
  2121      *     {@code radix + '\u005CuFF21' - 10}.
  2122      *     In this case, {@code ch - '\u005CuFF21' + 10}
  2123      *     is returned.
  2124      * <li>The character is one of the fullwidth lowercase Latin letters a
  2125      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  2126      *     and its code is less than
  2127      *     {@code radix + '\u005CuFF41' - 10}.
  2128      *     In this case, {@code ch - '\u005CuFF41' + 10}
  2129      *     is returned.
  2130      * </ul>
  2131      *
  2132      * <p><b>Note:</b> This method cannot handle <a
  2133      * href="#supplementary"> supplementary characters</a>. To support
  2134      * all Unicode characters, including supplementary characters, use
  2135      * the {@link #digit(int, int)} method.
  2136      *
  2137      * @param   ch      the character to be converted.
  2138      * @param   radix   the radix.
  2139      * @return  the numeric value represented by the character in the
  2140      *          specified radix.
  2141      * @see     Character#forDigit(int, int)
  2142      * @see     Character#isDigit(char)
  2143      */
  2144     public static int digit(char ch, int radix) {
  2145         return digit((int)ch, radix);
  2146     }
  2147 
  2148     /**
  2149      * Returns the numeric value of the specified character (Unicode
  2150      * code point) in the specified radix.
  2151      *
  2152      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
  2153      * {@code radix} &le; {@code MAX_RADIX} or if the
  2154      * character is not a valid digit in the specified
  2155      * radix, {@code -1} is returned. A character is a valid digit
  2156      * if at least one of the following is true:
  2157      * <ul>
  2158      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
  2159      *     and the Unicode decimal digit value of the character (or its
  2160      *     single-character decomposition) is less than the specified radix.
  2161      *     In this case the decimal digit value is returned.
  2162      * <li>The character is one of the uppercase Latin letters
  2163      *     {@code 'A'} through {@code 'Z'} and its code is less than
  2164      *     {@code radix + 'A' - 10}.
  2165      *     In this case, {@code codePoint - 'A' + 10}
  2166      *     is returned.
  2167      * <li>The character is one of the lowercase Latin letters
  2168      *     {@code 'a'} through {@code 'z'} and its code is less than
  2169      *     {@code radix + 'a' - 10}.
  2170      *     In this case, {@code codePoint - 'a' + 10}
  2171      *     is returned.
  2172      * <li>The character is one of the fullwidth uppercase Latin letters A
  2173      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  2174      *     and its code is less than
  2175      *     {@code radix + '\u005CuFF21' - 10}.
  2176      *     In this case,
  2177      *     {@code codePoint - '\u005CuFF21' + 10}
  2178      *     is returned.
  2179      * <li>The character is one of the fullwidth lowercase Latin letters a
  2180      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  2181      *     and its code is less than
  2182      *     {@code radix + '\u005CuFF41'- 10}.
  2183      *     In this case,
  2184      *     {@code codePoint - '\u005CuFF41' + 10}
  2185      *     is returned.
  2186      * </ul>
  2187      *
  2188      * @param   codePoint the character (Unicode code point) to be converted.
  2189      * @param   radix   the radix.
  2190      * @return  the numeric value represented by the character in the
  2191      *          specified radix.
  2192      * @see     Character#forDigit(int, int)
  2193      * @see     Character#isDigit(int)
  2194      * @since   1.5
  2195      */
  2196     @JavaScriptBody(args = { "codePoint", "radix" }, body=
  2197         "var x = parseInt(String.fromCharCode(codePoint), radix);\n"
  2198       + "return isNaN(x) ? -1 : x;"
  2199     )
  2200     public static int digit(int codePoint, int radix) {
  2201         throw new UnsupportedOperationException();
  2202     }
  2203 
  2204     /**
  2205      * Returns the {@code int} value that the specified Unicode
  2206      * character represents. For example, the character
  2207      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
  2208      * an int with a value of 50.
  2209      * <p>
  2210      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  2211      * {@code '\u005Cu005A'}), lowercase
  2212      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  2213      * full width variant ({@code '\u005CuFF21'} through
  2214      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  2215      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  2216      * through 35. This is independent of the Unicode specification,
  2217      * which does not assign numeric values to these {@code char}
  2218      * values.
  2219      * <p>
  2220      * If the character does not have a numeric value, then -1 is returned.
  2221      * If the character has a numeric value that cannot be represented as a
  2222      * nonnegative integer (for example, a fractional value), then -2
  2223      * is returned.
  2224      *
  2225      * <p><b>Note:</b> This method cannot handle <a
  2226      * href="#supplementary"> supplementary characters</a>. To support
  2227      * all Unicode characters, including supplementary characters, use
  2228      * the {@link #getNumericValue(int)} method.
  2229      *
  2230      * @param   ch      the character to be converted.
  2231      * @return  the numeric value of the character, as a nonnegative {@code int}
  2232      *           value; -2 if the character has a numeric value that is not a
  2233      *          nonnegative integer; -1 if the character has no numeric value.
  2234      * @see     Character#forDigit(int, int)
  2235      * @see     Character#isDigit(char)
  2236      * @since   1.1
  2237      */
  2238     public static int getNumericValue(char ch) {
  2239         return getNumericValue((int)ch);
  2240     }
  2241 
  2242     /**
  2243      * Returns the {@code int} value that the specified
  2244      * character (Unicode code point) represents. For example, the character
  2245      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
  2246      * an {@code int} with a value of 50.
  2247      * <p>
  2248      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  2249      * {@code '\u005Cu005A'}), lowercase
  2250      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  2251      * full width variant ({@code '\u005CuFF21'} through
  2252      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  2253      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  2254      * through 35. This is independent of the Unicode specification,
  2255      * which does not assign numeric values to these {@code char}
  2256      * values.
  2257      * <p>
  2258      * If the character does not have a numeric value, then -1 is returned.
  2259      * If the character has a numeric value that cannot be represented as a
  2260      * nonnegative integer (for example, a fractional value), then -2
  2261      * is returned.
  2262      *
  2263      * @param   codePoint the character (Unicode code point) to be converted.
  2264      * @return  the numeric value of the character, as a nonnegative {@code int}
  2265      *          value; -2 if the character has a numeric value that is not a
  2266      *          nonnegative integer; -1 if the character has no numeric value.
  2267      * @see     Character#forDigit(int, int)
  2268      * @see     Character#isDigit(int)
  2269      * @since   1.5
  2270      */
  2271     public static int getNumericValue(int codePoint) {
  2272         throw new UnsupportedOperationException();
  2273     }
  2274 
  2275     /**
  2276      * Determines if the specified character is ISO-LATIN-1 white space.
  2277      * This method returns {@code true} for the following five
  2278      * characters only:
  2279      * <table>
  2280      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
  2281      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
  2282      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
  2283      *     <td>{@code NEW LINE}</td></tr>
  2284      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
  2285      *     <td>{@code FORM FEED}</td></tr>
  2286      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
  2287      *     <td>{@code CARRIAGE RETURN}</td></tr>
  2288      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
  2289      *     <td>{@code SPACE}</td></tr>
  2290      * </table>
  2291      *
  2292      * @param      ch   the character to be tested.
  2293      * @return     {@code true} if the character is ISO-LATIN-1 white
  2294      *             space; {@code false} otherwise.
  2295      * @see        Character#isSpaceChar(char)
  2296      * @see        Character#isWhitespace(char)
  2297      * @deprecated Replaced by isWhitespace(char).
  2298      */
  2299     @Deprecated
  2300     public static boolean isSpace(char ch) {
  2301         return (ch <= 0x0020) &&
  2302             (((((1L << 0x0009) |
  2303             (1L << 0x000A) |
  2304             (1L << 0x000C) |
  2305             (1L << 0x000D) |
  2306             (1L << 0x0020)) >> ch) & 1L) != 0);
  2307     }
  2308 
  2309 
  2310 
  2311     /**
  2312      * Determines if the specified character is white space according to Java.
  2313      * A character is a Java whitespace character if and only if it satisfies
  2314      * one of the following criteria:
  2315      * <ul>
  2316      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
  2317      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
  2318      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  2319      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  2320      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  2321      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  2322      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  2323      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  2324      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  2325      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  2326      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  2327      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  2328      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  2329      * </ul>
  2330      *
  2331      * <p><b>Note:</b> This method cannot handle <a
  2332      * href="#supplementary"> supplementary characters</a>. To support
  2333      * all Unicode characters, including supplementary characters, use
  2334      * the {@link #isWhitespace(int)} method.
  2335      *
  2336      * @param   ch the character to be tested.
  2337      * @return  {@code true} if the character is a Java whitespace
  2338      *          character; {@code false} otherwise.
  2339      * @see     Character#isSpaceChar(char)
  2340      * @since   1.1
  2341      */
  2342     public static boolean isWhitespace(char ch) {
  2343         return isWhitespace((int)ch);
  2344     }
  2345 
  2346     /**
  2347      * Determines if the specified character (Unicode code point) is
  2348      * white space according to Java.  A character is a Java
  2349      * whitespace character if and only if it satisfies one of the
  2350      * following criteria:
  2351      * <ul>
  2352      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
  2353      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
  2354      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  2355      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  2356      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  2357      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  2358      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  2359      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  2360      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  2361      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  2362      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  2363      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  2364      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  2365      * </ul>
  2366      * <p>
  2367      *
  2368      * @param   codePoint the character (Unicode code point) to be tested.
  2369      * @return  {@code true} if the character is a Java whitespace
  2370      *          character; {@code false} otherwise.
  2371      * @see     Character#isSpaceChar(int)
  2372      * @since   1.5
  2373      */
  2374     public static boolean isWhitespace(int codePoint) {
  2375         throw new UnsupportedOperationException();
  2376     }
  2377 
  2378     /**
  2379      * Determines if the specified character is an ISO control
  2380      * character.  A character is considered to be an ISO control
  2381      * character if its code is in the range {@code '\u005Cu0000'}
  2382      * through {@code '\u005Cu001F'} or in the range
  2383      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  2384      *
  2385      * <p><b>Note:</b> This method cannot handle <a
  2386      * href="#supplementary"> supplementary characters</a>. To support
  2387      * all Unicode characters, including supplementary characters, use
  2388      * the {@link #isISOControl(int)} method.
  2389      *
  2390      * @param   ch      the character to be tested.
  2391      * @return  {@code true} if the character is an ISO control character;
  2392      *          {@code false} otherwise.
  2393      *
  2394      * @see     Character#isSpaceChar(char)
  2395      * @see     Character#isWhitespace(char)
  2396      * @since   1.1
  2397      */
  2398     public static boolean isISOControl(char ch) {
  2399         return isISOControl((int)ch);
  2400     }
  2401 
  2402     /**
  2403      * Determines if the referenced character (Unicode code point) is an ISO control
  2404      * character.  A character is considered to be an ISO control
  2405      * character if its code is in the range {@code '\u005Cu0000'}
  2406      * through {@code '\u005Cu001F'} or in the range
  2407      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  2408      *
  2409      * @param   codePoint the character (Unicode code point) to be tested.
  2410      * @return  {@code true} if the character is an ISO control character;
  2411      *          {@code false} otherwise.
  2412      * @see     Character#isSpaceChar(int)
  2413      * @see     Character#isWhitespace(int)
  2414      * @since   1.5
  2415      */
  2416     public static boolean isISOControl(int codePoint) {
  2417         // Optimized form of:
  2418         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
  2419         //     (codePoint >= 0x7F && codePoint <= 0x9F);
  2420         return codePoint <= 0x9F &&
  2421             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
  2422     }
  2423 
  2424     /**
  2425      * Determines the character representation for a specific digit in
  2426      * the specified radix. If the value of {@code radix} is not a
  2427      * valid radix, or the value of {@code digit} is not a valid
  2428      * digit in the specified radix, the null character
  2429      * ({@code '\u005Cu0000'}) is returned.
  2430      * <p>
  2431      * The {@code radix} argument is valid if it is greater than or
  2432      * equal to {@code MIN_RADIX} and less than or equal to
  2433      * {@code MAX_RADIX}. The {@code digit} argument is valid if
  2434      * {@code 0 <= digit < radix}.
  2435      * <p>
  2436      * If the digit is less than 10, then
  2437      * {@code '0' + digit} is returned. Otherwise, the value
  2438      * {@code 'a' + digit - 10} is returned.
  2439      *
  2440      * @param   digit   the number to convert to a character.
  2441      * @param   radix   the radix.
  2442      * @return  the {@code char} representation of the specified digit
  2443      *          in the specified radix.
  2444      * @see     Character#MIN_RADIX
  2445      * @see     Character#MAX_RADIX
  2446      * @see     Character#digit(char, int)
  2447      */
  2448     public static char forDigit(int digit, int radix) {
  2449         if ((digit >= radix) || (digit < 0)) {
  2450             return '\0';
  2451         }
  2452         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
  2453             return '\0';
  2454         }
  2455         if (digit < 10) {
  2456             return (char)('0' + digit);
  2457         }
  2458         return (char)('a' - 10 + digit);
  2459     }
  2460 
  2461     /**
  2462      * Compares two {@code Character} objects numerically.
  2463      *
  2464      * @param   anotherCharacter   the {@code Character} to be compared.
  2465 
  2466      * @return  the value {@code 0} if the argument {@code Character}
  2467      *          is equal to this {@code Character}; a value less than
  2468      *          {@code 0} if this {@code Character} is numerically less
  2469      *          than the {@code Character} argument; and a value greater than
  2470      *          {@code 0} if this {@code Character} is numerically greater
  2471      *          than the {@code Character} argument (unsigned comparison).
  2472      *          Note that this is strictly a numerical comparison; it is not
  2473      *          locale-dependent.
  2474      * @since   1.2
  2475      */
  2476     public int compareTo(Character anotherCharacter) {
  2477         return compare(this.value, anotherCharacter.value);
  2478     }
  2479 
  2480     /**
  2481      * Compares two {@code char} values numerically.
  2482      * The value returned is identical to what would be returned by:
  2483      * <pre>
  2484      *    Character.valueOf(x).compareTo(Character.valueOf(y))
  2485      * </pre>
  2486      *
  2487      * @param  x the first {@code char} to compare
  2488      * @param  y the second {@code char} to compare
  2489      * @return the value {@code 0} if {@code x == y};
  2490      *         a value less than {@code 0} if {@code x < y}; and
  2491      *         a value greater than {@code 0} if {@code x > y}
  2492      * @since 1.7
  2493      */
  2494     public static int compare(char x, char y) {
  2495         return x - y;
  2496     }
  2497 
  2498 
  2499     /**
  2500      * The number of bits used to represent a <tt>char</tt> value in unsigned
  2501      * binary form, constant {@code 16}.
  2502      *
  2503      * @since 1.5
  2504      */
  2505     public static final int SIZE = 16;
  2506 
  2507     /**
  2508      * Returns the value obtained by reversing the order of the bytes in the
  2509      * specified <tt>char</tt> value.
  2510      *
  2511      * @return the value obtained by reversing (or, equivalently, swapping)
  2512      *     the bytes in the specified <tt>char</tt> value.
  2513      * @since 1.5
  2514      */
  2515     public static char reverseBytes(char ch) {
  2516         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
  2517     }
  2518 
  2519 }