rt/emul/mini/src/main/java/java/lang/Character.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Tue, 17 Jan 2017 07:04:06 +0100
changeset 1985 cd1cc103a03c
parent 1384 12a395b571c8
permissions -rw-r--r--
Implementation of ClassValue for bck2brwsr
     1 /*
     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.lang;
    27 
    28 import org.apidesign.bck2brwsr.core.JavaScriptBody;
    29 
    30 /**
    31  * The {@code Character} class wraps a value of the primitive
    32  * type {@code char} in an object. An object of type
    33  * {@code Character} contains a single field whose type is
    34  * {@code char}.
    35  * <p>
    36  * In addition, this class provides several methods for determining
    37  * a character's category (lowercase letter, digit, etc.) and for converting
    38  * characters from uppercase to lowercase and vice versa.
    39  * <p>
    40  * Character information is based on the Unicode Standard, version 6.0.0.
    41  * <p>
    42  * The methods and data of class {@code Character} are defined by
    43  * the information in the <i>UnicodeData</i> file that is part of the
    44  * Unicode Character Database maintained by the Unicode
    45  * Consortium. This file specifies various properties including name
    46  * and general category for every defined Unicode code point or
    47  * character range.
    48  * <p>
    49  * The file and its description are available from the Unicode Consortium at:
    50  * <ul>
    51  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
    52  * </ul>
    53  *
    54  * <h4><a name="unicode">Unicode Character Representations</a></h4>
    55  *
    56  * <p>The {@code char} data type (and therefore the value that a
    57  * {@code Character} object encapsulates) are based on the
    58  * original Unicode specification, which defined characters as
    59  * fixed-width 16-bit entities. The Unicode Standard has since been
    60  * changed to allow for characters whose representation requires more
    61  * than 16 bits.  The range of legal <em>code point</em>s is now
    62  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
    63  * (Refer to the <a
    64  * href="http://www.unicode.org/reports/tr27/#notation"><i>
    65  * definition</i></a> of the U+<i>n</i> notation in the Unicode
    66  * Standard.)
    67  *
    68  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
    69  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
    70  * <a name="supplementary">Characters</a> whose code points are greater
    71  * than U+FFFF are called <em>supplementary character</em>s.  The Java
    72  * platform uses the UTF-16 representation in {@code char} arrays and
    73  * in the {@code String} and {@code StringBuffer} classes. In
    74  * this representation, supplementary characters are represented as a pair
    75  * of {@code char} values, the first from the <em>high-surrogates</em>
    76  * range, (&#92;uD800-&#92;uDBFF), the second from the
    77  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
    78  *
    79  * <p>A {@code char} value, therefore, represents Basic
    80  * Multilingual Plane (BMP) code points, including the surrogate
    81  * code points, or code units of the UTF-16 encoding. An
    82  * {@code int} value represents all Unicode code points,
    83  * including supplementary code points. The lower (least significant)
    84  * 21 bits of {@code int} are used to represent Unicode code
    85  * points and the upper (most significant) 11 bits must be zero.
    86  * Unless otherwise specified, the behavior with respect to
    87  * supplementary characters and surrogate {@code char} values is
    88  * as follows:
    89  *
    90  * <ul>
    91  * <li>The methods that only accept a {@code char} value cannot support
    92  * supplementary characters. They treat {@code char} values from the
    93  * surrogate ranges as undefined characters. For example,
    94  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
    95  * this specific value if followed by any low-surrogate value in a string
    96  * would represent a letter.
    97  *
    98  * <li>The methods that accept an {@code int} value support all
    99  * Unicode characters, including supplementary characters. For
   100  * example, {@code Character.isLetter(0x2F81A)} returns
   101  * {@code true} because the code point value represents a letter
   102  * (a CJK ideograph).
   103  * </ul>
   104  *
   105  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
   106  * used for character values in the range between U+0000 and U+10FFFF,
   107  * and <em>Unicode code unit</em> is used for 16-bit
   108  * {@code char} values that are code units of the <em>UTF-16</em>
   109  * encoding. For more information on Unicode terminology, refer to the
   110  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
   111  *
   112  * @author  Lee Boynton
   113  * @author  Guy Steele
   114  * @author  Akira Tanaka
   115  * @author  Martin Buchholz
   116  * @author  Ulf Zibis
   117  * @since   1.0
   118  */
   119 public final
   120 class Character implements java.io.Serializable, Comparable<Character> {
   121     /**
   122      * The minimum radix available for conversion to and from strings.
   123      * The constant value of this field is the smallest value permitted
   124      * for the radix argument in radix-conversion methods such as the
   125      * {@code digit} method, the {@code forDigit} method, and the
   126      * {@code toString} method of class {@code Integer}.
   127      *
   128      * @see     Character#digit(char, int)
   129      * @see     Character#forDigit(int, int)
   130      * @see     Integer#toString(int, int)
   131      * @see     Integer#valueOf(String)
   132      */
   133     public static final int MIN_RADIX = 2;
   134 
   135     /**
   136      * The maximum radix available for conversion to and from strings.
   137      * The constant value of this field is the largest value permitted
   138      * for the radix argument in radix-conversion methods such as the
   139      * {@code digit} method, the {@code forDigit} method, and the
   140      * {@code toString} method of class {@code Integer}.
   141      *
   142      * @see     Character#digit(char, int)
   143      * @see     Character#forDigit(int, int)
   144      * @see     Integer#toString(int, int)
   145      * @see     Integer#valueOf(String)
   146      */
   147     public static final int MAX_RADIX = 36;
   148 
   149     /**
   150      * The constant value of this field is the smallest value of type
   151      * {@code char}, {@code '\u005Cu0000'}.
   152      *
   153      * @since   1.0.2
   154      */
   155     public static final char MIN_VALUE = '\u0000';
   156 
   157     /**
   158      * The constant value of this field is the largest value of type
   159      * {@code char}, {@code '\u005CuFFFF'}.
   160      *
   161      * @since   1.0.2
   162      */
   163     public static final char MAX_VALUE = '\uFFFF';
   164 
   165     /**
   166      * The {@code Class} instance representing the primitive type
   167      * {@code char}.
   168      *
   169      * @since   1.1
   170      */
   171     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
   172 
   173     /*
   174      * Normative general types
   175      */
   176 
   177     /*
   178      * General character types
   179      */
   180 
   181     /**
   182      * General category "Cn" in the Unicode specification.
   183      * @since   1.1
   184      */
   185     public static final byte UNASSIGNED = 0;
   186 
   187     /**
   188      * General category "Lu" in the Unicode specification.
   189      * @since   1.1
   190      */
   191     public static final byte UPPERCASE_LETTER = 1;
   192 
   193     /**
   194      * General category "Ll" in the Unicode specification.
   195      * @since   1.1
   196      */
   197     public static final byte LOWERCASE_LETTER = 2;
   198 
   199     /**
   200      * General category "Lt" in the Unicode specification.
   201      * @since   1.1
   202      */
   203     public static final byte TITLECASE_LETTER = 3;
   204 
   205     /**
   206      * General category "Lm" in the Unicode specification.
   207      * @since   1.1
   208      */
   209     public static final byte MODIFIER_LETTER = 4;
   210 
   211     /**
   212      * General category "Lo" in the Unicode specification.
   213      * @since   1.1
   214      */
   215     public static final byte OTHER_LETTER = 5;
   216 
   217     /**
   218      * General category "Mn" in the Unicode specification.
   219      * @since   1.1
   220      */
   221     public static final byte NON_SPACING_MARK = 6;
   222 
   223     /**
   224      * General category "Me" in the Unicode specification.
   225      * @since   1.1
   226      */
   227     public static final byte ENCLOSING_MARK = 7;
   228 
   229     /**
   230      * General category "Mc" in the Unicode specification.
   231      * @since   1.1
   232      */
   233     public static final byte COMBINING_SPACING_MARK = 8;
   234 
   235     /**
   236      * General category "Nd" in the Unicode specification.
   237      * @since   1.1
   238      */
   239     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
   240 
   241     /**
   242      * General category "Nl" in the Unicode specification.
   243      * @since   1.1
   244      */
   245     public static final byte LETTER_NUMBER = 10;
   246 
   247     /**
   248      * General category "No" in the Unicode specification.
   249      * @since   1.1
   250      */
   251     public static final byte OTHER_NUMBER = 11;
   252 
   253     /**
   254      * General category "Zs" in the Unicode specification.
   255      * @since   1.1
   256      */
   257     public static final byte SPACE_SEPARATOR = 12;
   258 
   259     /**
   260      * General category "Zl" in the Unicode specification.
   261      * @since   1.1
   262      */
   263     public static final byte LINE_SEPARATOR = 13;
   264 
   265     /**
   266      * General category "Zp" in the Unicode specification.
   267      * @since   1.1
   268      */
   269     public static final byte PARAGRAPH_SEPARATOR = 14;
   270 
   271     /**
   272      * General category "Cc" in the Unicode specification.
   273      * @since   1.1
   274      */
   275     public static final byte CONTROL = 15;
   276 
   277     /**
   278      * General category "Cf" in the Unicode specification.
   279      * @since   1.1
   280      */
   281     public static final byte FORMAT = 16;
   282 
   283     /**
   284      * General category "Co" in the Unicode specification.
   285      * @since   1.1
   286      */
   287     public static final byte PRIVATE_USE = 18;
   288 
   289     /**
   290      * General category "Cs" in the Unicode specification.
   291      * @since   1.1
   292      */
   293     public static final byte SURROGATE = 19;
   294 
   295     /**
   296      * General category "Pd" in the Unicode specification.
   297      * @since   1.1
   298      */
   299     public static final byte DASH_PUNCTUATION = 20;
   300 
   301     /**
   302      * General category "Ps" in the Unicode specification.
   303      * @since   1.1
   304      */
   305     public static final byte START_PUNCTUATION = 21;
   306 
   307     /**
   308      * General category "Pe" in the Unicode specification.
   309      * @since   1.1
   310      */
   311     public static final byte END_PUNCTUATION = 22;
   312 
   313     /**
   314      * General category "Pc" in the Unicode specification.
   315      * @since   1.1
   316      */
   317     public static final byte CONNECTOR_PUNCTUATION = 23;
   318 
   319     /**
   320      * General category "Po" in the Unicode specification.
   321      * @since   1.1
   322      */
   323     public static final byte OTHER_PUNCTUATION = 24;
   324 
   325     /**
   326      * General category "Sm" in the Unicode specification.
   327      * @since   1.1
   328      */
   329     public static final byte MATH_SYMBOL = 25;
   330 
   331     /**
   332      * General category "Sc" in the Unicode specification.
   333      * @since   1.1
   334      */
   335     public static final byte CURRENCY_SYMBOL = 26;
   336 
   337     /**
   338      * General category "Sk" in the Unicode specification.
   339      * @since   1.1
   340      */
   341     public static final byte MODIFIER_SYMBOL = 27;
   342 
   343     /**
   344      * General category "So" in the Unicode specification.
   345      * @since   1.1
   346      */
   347     public static final byte OTHER_SYMBOL = 28;
   348 
   349     /**
   350      * General category "Pi" in the Unicode specification.
   351      * @since   1.4
   352      */
   353     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
   354 
   355     /**
   356      * General category "Pf" in the Unicode specification.
   357      * @since   1.4
   358      */
   359     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
   360 
   361     /**
   362      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
   363      */
   364     static final int ERROR = 0xFFFFFFFF;
   365 
   366 
   367     /**
   368      * Undefined bidirectional character type. Undefined {@code char}
   369      * values have undefined directionality in the Unicode specification.
   370      * @since 1.4
   371      */
   372     public static final byte DIRECTIONALITY_UNDEFINED = -1;
   373 
   374     /**
   375      * Strong bidirectional character type "L" in the Unicode specification.
   376      * @since 1.4
   377      */
   378     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
   379 
   380     /**
   381      * Strong bidirectional character type "R" in the Unicode specification.
   382      * @since 1.4
   383      */
   384     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
   385 
   386     /**
   387     * Strong bidirectional character type "AL" in the Unicode specification.
   388      * @since 1.4
   389      */
   390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
   391 
   392     /**
   393      * Weak bidirectional character type "EN" in the Unicode specification.
   394      * @since 1.4
   395      */
   396     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
   397 
   398     /**
   399      * Weak bidirectional character type "ES" in the Unicode specification.
   400      * @since 1.4
   401      */
   402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
   403 
   404     /**
   405      * Weak bidirectional character type "ET" in the Unicode specification.
   406      * @since 1.4
   407      */
   408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
   409 
   410     /**
   411      * Weak bidirectional character type "AN" in the Unicode specification.
   412      * @since 1.4
   413      */
   414     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
   415 
   416     /**
   417      * Weak bidirectional character type "CS" in the Unicode specification.
   418      * @since 1.4
   419      */
   420     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
   421 
   422     /**
   423      * Weak bidirectional character type "NSM" in the Unicode specification.
   424      * @since 1.4
   425      */
   426     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
   427 
   428     /**
   429      * Weak bidirectional character type "BN" in the Unicode specification.
   430      * @since 1.4
   431      */
   432     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
   433 
   434     /**
   435      * Neutral bidirectional character type "B" in the Unicode specification.
   436      * @since 1.4
   437      */
   438     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
   439 
   440     /**
   441      * Neutral bidirectional character type "S" in the Unicode specification.
   442      * @since 1.4
   443      */
   444     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
   445 
   446     /**
   447      * Neutral bidirectional character type "WS" in the Unicode specification.
   448      * @since 1.4
   449      */
   450     public static final byte DIRECTIONALITY_WHITESPACE = 12;
   451 
   452     /**
   453      * Neutral bidirectional character type "ON" in the Unicode specification.
   454      * @since 1.4
   455      */
   456     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
   457 
   458     /**
   459      * Strong bidirectional character type "LRE" in the Unicode specification.
   460      * @since 1.4
   461      */
   462     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
   463 
   464     /**
   465      * Strong bidirectional character type "LRO" in the Unicode specification.
   466      * @since 1.4
   467      */
   468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
   469 
   470     /**
   471      * Strong bidirectional character type "RLE" in the Unicode specification.
   472      * @since 1.4
   473      */
   474     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
   475 
   476     /**
   477      * Strong bidirectional character type "RLO" in the Unicode specification.
   478      * @since 1.4
   479      */
   480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
   481 
   482     /**
   483      * Weak bidirectional character type "PDF" in the Unicode specification.
   484      * @since 1.4
   485      */
   486     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
   487 
   488     /**
   489      * The minimum value of a
   490      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   491      * Unicode high-surrogate code unit</a>
   492      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
   493      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   494      *
   495      * @since 1.5
   496      */
   497     public static final char MIN_HIGH_SURROGATE = '\uD800';
   498 
   499     /**
   500      * The maximum value of a
   501      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   502      * Unicode high-surrogate code unit</a>
   503      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
   504      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   505      *
   506      * @since 1.5
   507      */
   508     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
   509 
   510     /**
   511      * The minimum value of a
   512      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   513      * Unicode low-surrogate code unit</a>
   514      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
   515      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   516      *
   517      * @since 1.5
   518      */
   519     public static final char MIN_LOW_SURROGATE  = '\uDC00';
   520 
   521     /**
   522      * The maximum value of a
   523      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   524      * Unicode low-surrogate code unit</a>
   525      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   526      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   527      *
   528      * @since 1.5
   529      */
   530     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
   531 
   532     /**
   533      * The minimum value of a Unicode surrogate code unit in the
   534      * UTF-16 encoding, constant {@code '\u005CuD800'}.
   535      *
   536      * @since 1.5
   537      */
   538     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
   539 
   540     /**
   541      * The maximum value of a Unicode surrogate code unit in the
   542      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   543      *
   544      * @since 1.5
   545      */
   546     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
   547 
   548     /**
   549      * The minimum value of a
   550      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
   551      * Unicode supplementary code point</a>, constant {@code U+10000}.
   552      *
   553      * @since 1.5
   554      */
   555     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
   556 
   557     /**
   558      * The minimum value of a
   559      * <a href="http://www.unicode.org/glossary/#code_point">
   560      * Unicode code point</a>, constant {@code U+0000}.
   561      *
   562      * @since 1.5
   563      */
   564     public static final int MIN_CODE_POINT = 0x000000;
   565 
   566     /**
   567      * The maximum value of a
   568      * <a href="http://www.unicode.org/glossary/#code_point">
   569      * Unicode code point</a>, constant {@code U+10FFFF}.
   570      *
   571      * @since 1.5
   572      */
   573     public static final int MAX_CODE_POINT = 0X10FFFF;
   574 
   575     public static boolean isAlphabetic(int ch) {
   576         throw new UnsupportedOperationException("isAlphabetic: " + (char)ch);
   577     }
   578 
   579     public static boolean isIdeographic(int ch) {
   580         throw new UnsupportedOperationException("isIdeographic: " + (char)ch);
   581     }
   582 
   583     public static boolean isLowerCase(int ch) {
   584         throw new UnsupportedOperationException("isLowerCase: " + (char)ch);
   585     }
   586 
   587     public static boolean isUpperCase(int ch) {
   588         throw new UnsupportedOperationException("isUpperCase: " + (char)ch);
   589     }
   590 
   591     public static boolean isMirrored(int ch) {
   592         throw new UnsupportedOperationException("isMirrored: " + (char)ch);
   593     }
   594 
   595     public static boolean isIdentifierIgnorable(int ch) {
   596         throw new UnsupportedOperationException("isIdentifierIgnorable: " + (char)ch);
   597     }
   598 
   599     public static boolean isUnicodeIdentifierPart(int ch) {
   600         throw new UnsupportedOperationException("isUnicodeIdentifierPart: " + (char)ch);
   601     }
   602 
   603     public static boolean isUnicodeIdentifierStart(int ch) {
   604         throw new UnsupportedOperationException("isUnicodeIdentifierStart: " + (char)ch);
   605     }
   606 
   607     public static char toUpperCase(int ch) {
   608         throw new UnsupportedOperationException("toUpperCase: " + (char)ch);
   609     }
   610 
   611     public static int toLowerCase(int ch) {
   612         throw new UnsupportedOperationException("toLowerCase: " + (char)ch);
   613     }
   614 
   615 
   616     /**
   617      * Instances of this class represent particular subsets of the Unicode
   618      * character set.  The only family of subsets defined in the
   619      * {@code Character} class is {@link Character.UnicodeBlock}.
   620      * Other portions of the Java API may define other subsets for their
   621      * own purposes.
   622      *
   623      * @since 1.2
   624      */
   625     public static class Subset  {
   626 
   627         private String name;
   628 
   629         /**
   630          * Constructs a new {@code Subset} instance.
   631          *
   632          * @param  name  The name of this subset
   633          * @exception NullPointerException if name is {@code null}
   634          */
   635         protected Subset(String name) {
   636             if (name == null) {
   637                 throw new NullPointerException("name");
   638             }
   639             this.name = name;
   640         }
   641 
   642         /**
   643          * Compares two {@code Subset} objects for equality.
   644          * This method returns {@code true} if and only if
   645          * {@code this} and the argument refer to the same
   646          * object; since this method is {@code final}, this
   647          * guarantee holds for all subclasses.
   648          */
   649         public final boolean equals(Object obj) {
   650             return (this == obj);
   651         }
   652 
   653         /**
   654          * Returns the standard hash code as defined by the
   655          * {@link Object#hashCode} method.  This method
   656          * is {@code final} in order to ensure that the
   657          * {@code equals} and {@code hashCode} methods will
   658          * be consistent in all subclasses.
   659          */
   660         public final int hashCode() {
   661             return super.hashCode();
   662         }
   663 
   664         /**
   665          * Returns the name of this subset.
   666          */
   667         public final String toString() {
   668             return name;
   669         }
   670     }
   671 
   672     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
   673     // for the latest specification of Unicode Blocks.
   674 
   675 
   676     /**
   677      * The value of the {@code Character}.
   678      *
   679      * @serial
   680      */
   681     private final char value;
   682 
   683     /** use serialVersionUID from JDK 1.0.2 for interoperability */
   684     private static final long serialVersionUID = 3786198910865385080L;
   685 
   686     /**
   687      * Constructs a newly allocated {@code Character} object that
   688      * represents the specified {@code char} value.
   689      *
   690      * @param  value   the value to be represented by the
   691      *                  {@code Character} object.
   692      */
   693     public Character(char value) {
   694         this.value = value;
   695     }
   696 
   697     private static class CharacterCache {
   698         private CharacterCache(){}
   699 
   700         static final Character cache[] = new Character[127 + 1];
   701 
   702         static {
   703             for (int i = 0; i < cache.length; i++)
   704                 cache[i] = new Character((char)i);
   705         }
   706     }
   707 
   708     /**
   709      * Returns a <tt>Character</tt> instance representing the specified
   710      * <tt>char</tt> value.
   711      * If a new <tt>Character</tt> instance is not required, this method
   712      * should generally be used in preference to the constructor
   713      * {@link #Character(char)}, as this method is likely to yield
   714      * significantly better space and time performance by caching
   715      * frequently requested values.
   716      *
   717      * This method will always cache values in the range {@code
   718      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   719      * cache other values outside of this range.
   720      *
   721      * @param  c a char value.
   722      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   723      * @since  1.5
   724      */
   725     public static Character valueOf(char c) {
   726         if (c <= 127) { // must cache
   727             return CharacterCache.cache[(int)c];
   728         }
   729         return new Character(c);
   730     }
   731 
   732     /**
   733      * Returns the value of this {@code Character} object.
   734      * @return  the primitive {@code char} value represented by
   735      *          this object.
   736      */
   737     public char charValue() {
   738         return value;
   739     }
   740 
   741     /**
   742      * Returns a hash code for this {@code Character}; equal to the result
   743      * of invoking {@code charValue()}.
   744      *
   745      * @return a hash code value for this {@code Character}
   746      */
   747     public int hashCode() {
   748         return (int)value;
   749     }
   750 
   751     /**
   752      * Compares this object against the specified object.
   753      * The result is {@code true} if and only if the argument is not
   754      * {@code null} and is a {@code Character} object that
   755      * represents the same {@code char} value as this object.
   756      *
   757      * @param   obj   the object to compare with.
   758      * @return  {@code true} if the objects are the same;
   759      *          {@code false} otherwise.
   760      */
   761     public boolean equals(Object obj) {
   762         if (obj instanceof Character) {
   763             return value == ((Character)obj).charValue();
   764         }
   765         return false;
   766     }
   767 
   768     /**
   769      * Returns a {@code String} object representing this
   770      * {@code Character}'s value.  The result is a string of
   771      * length 1 whose sole component is the primitive
   772      * {@code char} value represented by this
   773      * {@code Character} object.
   774      *
   775      * @return  a string representation of this object.
   776      */
   777     public String toString() {
   778         char buf[] = {value};
   779         return String.valueOf(buf);
   780     }
   781 
   782     /**
   783      * Returns a {@code String} object representing the
   784      * specified {@code char}.  The result is a string of length
   785      * 1 consisting solely of the specified {@code char}.
   786      *
   787      * @param c the {@code char} to be converted
   788      * @return the string representation of the specified {@code char}
   789      * @since 1.4
   790      */
   791     public static String toString(char c) {
   792         return String.valueOf(c);
   793     }
   794 
   795     /**
   796      * Determines whether the specified code point is a valid
   797      * <a href="http://www.unicode.org/glossary/#code_point">
   798      * Unicode code point value</a>.
   799      *
   800      * @param  codePoint the Unicode code point to be tested
   801      * @return {@code true} if the specified code point value is between
   802      *         {@link #MIN_CODE_POINT} and
   803      *         {@link #MAX_CODE_POINT} inclusive;
   804      *         {@code false} otherwise.
   805      * @since  1.5
   806      */
   807     public static boolean isValidCodePoint(int codePoint) {
   808         // Optimized form of:
   809         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   810         int plane = codePoint >>> 16;
   811         return plane < ((MAX_CODE_POINT + 1) >>> 16);
   812     }
   813 
   814     /**
   815      * Determines whether the specified character (Unicode code point)
   816      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   817      * Such code points can be represented using a single {@code char}.
   818      *
   819      * @param  codePoint the character (Unicode code point) to be tested
   820      * @return {@code true} if the specified code point is between
   821      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   822      *         {@code false} otherwise.
   823      * @since  1.7
   824      */
   825     public static boolean isBmpCodePoint(int codePoint) {
   826         return codePoint >>> 16 == 0;
   827         // Optimized form of:
   828         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   829         // We consistently use logical shift (>>>) to facilitate
   830         // additional runtime optimizations.
   831     }
   832 
   833     /**
   834      * Determines whether the specified character (Unicode code point)
   835      * is in the <a href="#supplementary">supplementary character</a> range.
   836      *
   837      * @param  codePoint the character (Unicode code point) to be tested
   838      * @return {@code true} if the specified code point is between
   839      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   840      *         {@link #MAX_CODE_POINT} inclusive;
   841      *         {@code false} otherwise.
   842      * @since  1.5
   843      */
   844     public static boolean isSupplementaryCodePoint(int codePoint) {
   845         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   846             && codePoint <  MAX_CODE_POINT + 1;
   847     }
   848 
   849     /**
   850      * Determines if the given {@code char} value is a
   851      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   852      * Unicode high-surrogate code unit</a>
   853      * (also known as <i>leading-surrogate code unit</i>).
   854      *
   855      * <p>Such values do not represent characters by themselves,
   856      * but are used in the representation of
   857      * <a href="#supplementary">supplementary characters</a>
   858      * in the UTF-16 encoding.
   859      *
   860      * @param  ch the {@code char} value to be tested.
   861      * @return {@code true} if the {@code char} value is between
   862      *         {@link #MIN_HIGH_SURROGATE} and
   863      *         {@link #MAX_HIGH_SURROGATE} inclusive;
   864      *         {@code false} otherwise.
   865      * @see    Character#isLowSurrogate(char)
   866      * @see    Character.UnicodeBlock#of(int)
   867      * @since  1.5
   868      */
   869     public static boolean isHighSurrogate(char ch) {
   870         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   871         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   872     }
   873 
   874     /**
   875      * Determines if the given {@code char} value is a
   876      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   877      * Unicode low-surrogate code unit</a>
   878      * (also known as <i>trailing-surrogate code unit</i>).
   879      *
   880      * <p>Such values do not represent characters by themselves,
   881      * but are used in the representation of
   882      * <a href="#supplementary">supplementary characters</a>
   883      * in the UTF-16 encoding.
   884      *
   885      * @param  ch the {@code char} value to be tested.
   886      * @return {@code true} if the {@code char} value is between
   887      *         {@link #MIN_LOW_SURROGATE} and
   888      *         {@link #MAX_LOW_SURROGATE} inclusive;
   889      *         {@code false} otherwise.
   890      * @see    Character#isHighSurrogate(char)
   891      * @since  1.5
   892      */
   893     public static boolean isLowSurrogate(char ch) {
   894         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   895     }
   896 
   897     /**
   898      * Determines if the given {@code char} value is a Unicode
   899      * <i>surrogate code unit</i>.
   900      *
   901      * <p>Such values do not represent characters by themselves,
   902      * but are used in the representation of
   903      * <a href="#supplementary">supplementary characters</a>
   904      * in the UTF-16 encoding.
   905      *
   906      * <p>A char value is a surrogate code unit if and only if it is either
   907      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   908      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   909      *
   910      * @param  ch the {@code char} value to be tested.
   911      * @return {@code true} if the {@code char} value is between
   912      *         {@link #MIN_SURROGATE} and
   913      *         {@link #MAX_SURROGATE} inclusive;
   914      *         {@code false} otherwise.
   915      * @since  1.7
   916      */
   917     public static boolean isSurrogate(char ch) {
   918         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   919     }
   920 
   921     /**
   922      * Determines whether the specified pair of {@code char}
   923      * values is a valid
   924      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   925      * Unicode surrogate pair</a>.
   926 
   927      * <p>This method is equivalent to the expression:
   928      * <blockquote><pre>
   929      * isHighSurrogate(high) && isLowSurrogate(low)
   930      * </pre></blockquote>
   931      *
   932      * @param  high the high-surrogate code value to be tested
   933      * @param  low the low-surrogate code value to be tested
   934      * @return {@code true} if the specified high and
   935      * low-surrogate code values represent a valid surrogate pair;
   936      * {@code false} otherwise.
   937      * @since  1.5
   938      */
   939     public static boolean isSurrogatePair(char high, char low) {
   940         return isHighSurrogate(high) && isLowSurrogate(low);
   941     }
   942 
   943     /**
   944      * Determines the number of {@code char} values needed to
   945      * represent the specified character (Unicode code point). If the
   946      * specified character is equal to or greater than 0x10000, then
   947      * the method returns 2. Otherwise, the method returns 1.
   948      *
   949      * <p>This method doesn't validate the specified character to be a
   950      * valid Unicode code point. The caller must validate the
   951      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   952      * if necessary.
   953      *
   954      * @param   codePoint the character (Unicode code point) to be tested.
   955      * @return  2 if the character is a valid supplementary character; 1 otherwise.
   956      * @see     Character#isSupplementaryCodePoint(int)
   957      * @since   1.5
   958      */
   959     public static int charCount(int codePoint) {
   960         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   961     }
   962 
   963     /**
   964      * Converts the specified surrogate pair to its supplementary code
   965      * point value. This method does not validate the specified
   966      * surrogate pair. The caller must validate it using {@link
   967      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   968      *
   969      * @param  high the high-surrogate code unit
   970      * @param  low the low-surrogate code unit
   971      * @return the supplementary code point composed from the
   972      *         specified surrogate pair.
   973      * @since  1.5
   974      */
   975     public static int toCodePoint(char high, char low) {
   976         // Optimized form of:
   977         // return ((high - MIN_HIGH_SURROGATE) << 10)
   978         //         + (low - MIN_LOW_SURROGATE)
   979         //         + MIN_SUPPLEMENTARY_CODE_POINT;
   980         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   981                                        - (MIN_HIGH_SURROGATE << 10)
   982                                        - MIN_LOW_SURROGATE);
   983     }
   984 
   985     /**
   986      * Returns the code point at the given index of the
   987      * {@code CharSequence}. If the {@code char} value at
   988      * the given index in the {@code CharSequence} is in the
   989      * high-surrogate range, the following index is less than the
   990      * length of the {@code CharSequence}, and the
   991      * {@code char} value at the following index is in the
   992      * low-surrogate range, then the supplementary code point
   993      * corresponding to this surrogate pair is returned. Otherwise,
   994      * the {@code char} value at the given index is returned.
   995      *
   996      * @param seq a sequence of {@code char} values (Unicode code
   997      * units)
   998      * @param index the index to the {@code char} values (Unicode
   999      * code units) in {@code seq} to be converted
  1000      * @return the Unicode code point at the given index
  1001      * @exception NullPointerException if {@code seq} is null.
  1002      * @exception IndexOutOfBoundsException if the value
  1003      * {@code index} is negative or not less than
  1004      * {@link CharSequence#length() seq.length()}.
  1005      * @since  1.5
  1006      */
  1007     public static int codePointAt(CharSequence seq, int index) {
  1008         char c1 = seq.charAt(index++);
  1009         if (isHighSurrogate(c1)) {
  1010             if (index < seq.length()) {
  1011                 char c2 = seq.charAt(index);
  1012                 if (isLowSurrogate(c2)) {
  1013                     return toCodePoint(c1, c2);
  1014                 }
  1015             }
  1016         }
  1017         return c1;
  1018     }
  1019 
  1020     /**
  1021      * Returns the code point at the given index of the
  1022      * {@code char} array. If the {@code char} value at
  1023      * the given index in the {@code char} array is in the
  1024      * high-surrogate range, the following index is less than the
  1025      * length of the {@code char} array, and the
  1026      * {@code char} value at the following index is in the
  1027      * low-surrogate range, then the supplementary code point
  1028      * corresponding to this surrogate pair is returned. Otherwise,
  1029      * the {@code char} value at the given index is returned.
  1030      *
  1031      * @param a the {@code char} array
  1032      * @param index the index to the {@code char} values (Unicode
  1033      * code units) in the {@code char} array to be converted
  1034      * @return the Unicode code point at the given index
  1035      * @exception NullPointerException if {@code a} is null.
  1036      * @exception IndexOutOfBoundsException if the value
  1037      * {@code index} is negative or not less than
  1038      * the length of the {@code char} array.
  1039      * @since  1.5
  1040      */
  1041     public static int codePointAt(char[] a, int index) {
  1042         return codePointAtImpl(a, index, a.length);
  1043     }
  1044 
  1045     /**
  1046      * Returns the code point at the given index of the
  1047      * {@code char} array, where only array elements with
  1048      * {@code index} less than {@code limit} can be used. If
  1049      * the {@code char} value at the given index in the
  1050      * {@code char} array is in the high-surrogate range, the
  1051      * following index is less than the {@code limit}, and the
  1052      * {@code char} value at the following index is in the
  1053      * low-surrogate range, then the supplementary code point
  1054      * corresponding to this surrogate pair is returned. Otherwise,
  1055      * the {@code char} value at the given index is returned.
  1056      *
  1057      * @param a the {@code char} array
  1058      * @param index the index to the {@code char} values (Unicode
  1059      * code units) in the {@code char} array to be converted
  1060      * @param limit the index after the last array element that
  1061      * can be used in the {@code char} array
  1062      * @return the Unicode code point at the given index
  1063      * @exception NullPointerException if {@code a} is null.
  1064      * @exception IndexOutOfBoundsException if the {@code index}
  1065      * argument is negative or not less than the {@code limit}
  1066      * argument, or if the {@code limit} argument is negative or
  1067      * greater than the length of the {@code char} array.
  1068      * @since  1.5
  1069      */
  1070     public static int codePointAt(char[] a, int index, int limit) {
  1071         if (index >= limit || limit < 0 || limit > a.length) {
  1072             throw new IndexOutOfBoundsException();
  1073         }
  1074         return codePointAtImpl(a, index, limit);
  1075     }
  1076 
  1077     // throws ArrayIndexOutofBoundsException if index out of bounds
  1078     static int codePointAtImpl(char[] a, int index, int limit) {
  1079         char c1 = a[index++];
  1080         if (isHighSurrogate(c1)) {
  1081             if (index < limit) {
  1082                 char c2 = a[index];
  1083                 if (isLowSurrogate(c2)) {
  1084                     return toCodePoint(c1, c2);
  1085                 }
  1086             }
  1087         }
  1088         return c1;
  1089     }
  1090 
  1091     /**
  1092      * Returns the code point preceding the given index of the
  1093      * {@code CharSequence}. If the {@code char} value at
  1094      * {@code (index - 1)} in the {@code CharSequence} is in
  1095      * the low-surrogate range, {@code (index - 2)} is not
  1096      * negative, and the {@code char} value at {@code (index - 2)}
  1097      * in the {@code CharSequence} is in the
  1098      * high-surrogate range, then the supplementary code point
  1099      * corresponding to this surrogate pair is returned. Otherwise,
  1100      * the {@code char} value at {@code (index - 1)} is
  1101      * returned.
  1102      *
  1103      * @param seq the {@code CharSequence} instance
  1104      * @param index the index following the code point that should be returned
  1105      * @return the Unicode code point value before the given index.
  1106      * @exception NullPointerException if {@code seq} is null.
  1107      * @exception IndexOutOfBoundsException if the {@code index}
  1108      * argument is less than 1 or greater than {@link
  1109      * CharSequence#length() seq.length()}.
  1110      * @since  1.5
  1111      */
  1112     public static int codePointBefore(CharSequence seq, int index) {
  1113         char c2 = seq.charAt(--index);
  1114         if (isLowSurrogate(c2)) {
  1115             if (index > 0) {
  1116                 char c1 = seq.charAt(--index);
  1117                 if (isHighSurrogate(c1)) {
  1118                     return toCodePoint(c1, c2);
  1119                 }
  1120             }
  1121         }
  1122         return c2;
  1123     }
  1124 
  1125     /**
  1126      * Returns the code point preceding the given index of the
  1127      * {@code char} array. If the {@code char} value at
  1128      * {@code (index - 1)} in the {@code char} array is in
  1129      * the low-surrogate range, {@code (index - 2)} is not
  1130      * negative, and the {@code char} value at {@code (index - 2)}
  1131      * in the {@code char} array is in the
  1132      * high-surrogate range, then the supplementary code point
  1133      * corresponding to this surrogate pair is returned. Otherwise,
  1134      * the {@code char} value at {@code (index - 1)} is
  1135      * returned.
  1136      *
  1137      * @param a the {@code char} array
  1138      * @param index the index following the code point that should be returned
  1139      * @return the Unicode code point value before the given index.
  1140      * @exception NullPointerException if {@code a} is null.
  1141      * @exception IndexOutOfBoundsException if the {@code index}
  1142      * argument is less than 1 or greater than the length of the
  1143      * {@code char} array
  1144      * @since  1.5
  1145      */
  1146     public static int codePointBefore(char[] a, int index) {
  1147         return codePointBeforeImpl(a, index, 0);
  1148     }
  1149 
  1150     /**
  1151      * Returns the code point preceding the given index of the
  1152      * {@code char} array, where only array elements with
  1153      * {@code index} greater than or equal to {@code start}
  1154      * can be used. If the {@code char} value at {@code (index - 1)}
  1155      * in the {@code char} array is in the
  1156      * low-surrogate range, {@code (index - 2)} is not less than
  1157      * {@code start}, and the {@code char} value at
  1158      * {@code (index - 2)} in the {@code char} array is in
  1159      * the high-surrogate range, then the supplementary code point
  1160      * corresponding to this surrogate pair is returned. Otherwise,
  1161      * the {@code char} value at {@code (index - 1)} is
  1162      * returned.
  1163      *
  1164      * @param a the {@code char} array
  1165      * @param index the index following the code point that should be returned
  1166      * @param start the index of the first array element in the
  1167      * {@code char} array
  1168      * @return the Unicode code point value before the given index.
  1169      * @exception NullPointerException if {@code a} is null.
  1170      * @exception IndexOutOfBoundsException if the {@code index}
  1171      * argument is not greater than the {@code start} argument or
  1172      * is greater than the length of the {@code char} array, or
  1173      * if the {@code start} argument is negative or not less than
  1174      * the length of the {@code char} array.
  1175      * @since  1.5
  1176      */
  1177     public static int codePointBefore(char[] a, int index, int start) {
  1178         if (index <= start || start < 0 || start >= a.length) {
  1179             throw new IndexOutOfBoundsException();
  1180         }
  1181         return codePointBeforeImpl(a, index, start);
  1182     }
  1183 
  1184     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
  1185     static int codePointBeforeImpl(char[] a, int index, int start) {
  1186         char c2 = a[--index];
  1187         if (isLowSurrogate(c2)) {
  1188             if (index > start) {
  1189                 char c1 = a[--index];
  1190                 if (isHighSurrogate(c1)) {
  1191                     return toCodePoint(c1, c2);
  1192                 }
  1193             }
  1194         }
  1195         return c2;
  1196     }
  1197 
  1198     /**
  1199      * Returns the leading surrogate (a
  1200      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  1201      * high surrogate code unit</a>) of the
  1202      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1203      * surrogate pair</a>
  1204      * representing the specified supplementary character (Unicode
  1205      * code point) in the UTF-16 encoding.  If the specified character
  1206      * is not a
  1207      * <a href="Character.html#supplementary">supplementary character</a>,
  1208      * an unspecified {@code char} is returned.
  1209      *
  1210      * <p>If
  1211      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1212      * is {@code true}, then
  1213      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
  1214      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
  1215      * are also always {@code true}.
  1216      *
  1217      * @param   codePoint a supplementary character (Unicode code point)
  1218      * @return  the leading surrogate code unit used to represent the
  1219      *          character in the UTF-16 encoding
  1220      * @since   1.7
  1221      */
  1222     public static char highSurrogate(int codePoint) {
  1223         return (char) ((codePoint >>> 10)
  1224             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
  1225     }
  1226 
  1227     /**
  1228      * Returns the trailing surrogate (a
  1229      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  1230      * low surrogate code unit</a>) of the
  1231      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1232      * surrogate pair</a>
  1233      * representing the specified supplementary character (Unicode
  1234      * code point) in the UTF-16 encoding.  If the specified character
  1235      * is not a
  1236      * <a href="Character.html#supplementary">supplementary character</a>,
  1237      * an unspecified {@code char} is returned.
  1238      *
  1239      * <p>If
  1240      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1241      * is {@code true}, then
  1242      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
  1243      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
  1244      * are also always {@code true}.
  1245      *
  1246      * @param   codePoint a supplementary character (Unicode code point)
  1247      * @return  the trailing surrogate code unit used to represent the
  1248      *          character in the UTF-16 encoding
  1249      * @since   1.7
  1250      */
  1251     public static char lowSurrogate(int codePoint) {
  1252         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  1253     }
  1254 
  1255     /**
  1256      * Converts the specified character (Unicode code point) to its
  1257      * UTF-16 representation. If the specified code point is a BMP
  1258      * (Basic Multilingual Plane or Plane 0) value, the same value is
  1259      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
  1260      * specified code point is a supplementary character, its
  1261      * surrogate values are stored in {@code dst[dstIndex]}
  1262      * (high-surrogate) and {@code dst[dstIndex+1]}
  1263      * (low-surrogate), and 2 is returned.
  1264      *
  1265      * @param  codePoint the character (Unicode code point) to be converted.
  1266      * @param  dst an array of {@code char} in which the
  1267      * {@code codePoint}'s UTF-16 value is stored.
  1268      * @param dstIndex the start index into the {@code dst}
  1269      * array where the converted value is stored.
  1270      * @return 1 if the code point is a BMP code point, 2 if the
  1271      * code point is a supplementary code point.
  1272      * @exception IllegalArgumentException if the specified
  1273      * {@code codePoint} is not a valid Unicode code point.
  1274      * @exception NullPointerException if the specified {@code dst} is null.
  1275      * @exception IndexOutOfBoundsException if {@code dstIndex}
  1276      * is negative or not less than {@code dst.length}, or if
  1277      * {@code dst} at {@code dstIndex} doesn't have enough
  1278      * array element(s) to store the resulting {@code char}
  1279      * value(s). (If {@code dstIndex} is equal to
  1280      * {@code dst.length-1} and the specified
  1281      * {@code codePoint} is a supplementary character, the
  1282      * high-surrogate value is not stored in
  1283      * {@code dst[dstIndex]}.)
  1284      * @since  1.5
  1285      */
  1286     public static int toChars(int codePoint, char[] dst, int dstIndex) {
  1287         if (isBmpCodePoint(codePoint)) {
  1288             dst[dstIndex] = (char) codePoint;
  1289             return 1;
  1290         } else if (isValidCodePoint(codePoint)) {
  1291             toSurrogates(codePoint, dst, dstIndex);
  1292             return 2;
  1293         } else {
  1294             throw new IllegalArgumentException();
  1295         }
  1296     }
  1297 
  1298     /**
  1299      * Converts the specified character (Unicode code point) to its
  1300      * UTF-16 representation stored in a {@code char} array. If
  1301      * the specified code point is a BMP (Basic Multilingual Plane or
  1302      * Plane 0) value, the resulting {@code char} array has
  1303      * the same value as {@code codePoint}. If the specified code
  1304      * point is a supplementary code point, the resulting
  1305      * {@code char} array has the corresponding surrogate pair.
  1306      *
  1307      * @param  codePoint a Unicode code point
  1308      * @return a {@code char} array having
  1309      *         {@code codePoint}'s UTF-16 representation.
  1310      * @exception IllegalArgumentException if the specified
  1311      * {@code codePoint} is not a valid Unicode code point.
  1312      * @since  1.5
  1313      */
  1314     public static char[] toChars(int codePoint) {
  1315         if (isBmpCodePoint(codePoint)) {
  1316             return new char[] { (char) codePoint };
  1317         } else if (isValidCodePoint(codePoint)) {
  1318             char[] result = new char[2];
  1319             toSurrogates(codePoint, result, 0);
  1320             return result;
  1321         } else {
  1322             throw new IllegalArgumentException();
  1323         }
  1324     }
  1325 
  1326     static void toSurrogates(int codePoint, char[] dst, int index) {
  1327         // We write elements "backwards" to guarantee all-or-nothing
  1328         dst[index+1] = lowSurrogate(codePoint);
  1329         dst[index] = highSurrogate(codePoint);
  1330     }
  1331 
  1332     /**
  1333      * Returns the number of Unicode code points in the text range of
  1334      * the specified char sequence. The text range begins at the
  1335      * specified {@code beginIndex} and extends to the
  1336      * {@code char} at index {@code endIndex - 1}. Thus the
  1337      * length (in {@code char}s) of the text range is
  1338      * {@code endIndex-beginIndex}. Unpaired surrogates within
  1339      * the text range count as one code point each.
  1340      *
  1341      * @param seq the char sequence
  1342      * @param beginIndex the index to the first {@code char} of
  1343      * the text range.
  1344      * @param endIndex the index after the last {@code char} of
  1345      * the text range.
  1346      * @return the number of Unicode code points in the specified text
  1347      * range
  1348      * @exception NullPointerException if {@code seq} is null.
  1349      * @exception IndexOutOfBoundsException if the
  1350      * {@code beginIndex} is negative, or {@code endIndex}
  1351      * is larger than the length of the given sequence, or
  1352      * {@code beginIndex} is larger than {@code endIndex}.
  1353      * @since  1.5
  1354      */
  1355     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
  1356         int length = seq.length();
  1357         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
  1358             throw new IndexOutOfBoundsException();
  1359         }
  1360         int n = endIndex - beginIndex;
  1361         for (int i = beginIndex; i < endIndex; ) {
  1362             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
  1363                 isLowSurrogate(seq.charAt(i))) {
  1364                 n--;
  1365                 i++;
  1366             }
  1367         }
  1368         return n;
  1369     }
  1370 
  1371     /**
  1372      * Returns the number of Unicode code points in a subarray of the
  1373      * {@code char} array argument. The {@code offset}
  1374      * argument is the index of the first {@code char} of the
  1375      * subarray and the {@code count} argument specifies the
  1376      * length of the subarray in {@code char}s. Unpaired
  1377      * surrogates within the subarray count as one code point each.
  1378      *
  1379      * @param a the {@code char} array
  1380      * @param offset the index of the first {@code char} in the
  1381      * given {@code char} array
  1382      * @param count the length of the subarray in {@code char}s
  1383      * @return the number of Unicode code points in the specified subarray
  1384      * @exception NullPointerException if {@code a} is null.
  1385      * @exception IndexOutOfBoundsException if {@code offset} or
  1386      * {@code count} is negative, or if {@code offset +
  1387      * count} is larger than the length of the given array.
  1388      * @since  1.5
  1389      */
  1390     public static int codePointCount(char[] a, int offset, int count) {
  1391         if (count > a.length - offset || offset < 0 || count < 0) {
  1392             throw new IndexOutOfBoundsException();
  1393         }
  1394         return codePointCountImpl(a, offset, count);
  1395     }
  1396 
  1397     static int codePointCountImpl(char[] a, int offset, int count) {
  1398         int endIndex = offset + count;
  1399         int n = count;
  1400         for (int i = offset; i < endIndex; ) {
  1401             if (isHighSurrogate(a[i++]) && i < endIndex &&
  1402                 isLowSurrogate(a[i])) {
  1403                 n--;
  1404                 i++;
  1405             }
  1406         }
  1407         return n;
  1408     }
  1409 
  1410     /**
  1411      * Returns the index within the given char sequence that is offset
  1412      * from the given {@code index} by {@code codePointOffset}
  1413      * code points. Unpaired surrogates within the text range given by
  1414      * {@code index} and {@code codePointOffset} count as
  1415      * one code point each.
  1416      *
  1417      * @param seq the char sequence
  1418      * @param index the index to be offset
  1419      * @param codePointOffset the offset in code points
  1420      * @return the index within the char sequence
  1421      * @exception NullPointerException if {@code seq} is null.
  1422      * @exception IndexOutOfBoundsException if {@code index}
  1423      *   is negative or larger then the length of the char sequence,
  1424      *   or if {@code codePointOffset} is positive and the
  1425      *   subsequence starting with {@code index} has fewer than
  1426      *   {@code codePointOffset} code points, or if
  1427      *   {@code codePointOffset} is negative and the subsequence
  1428      *   before {@code index} has fewer than the absolute value
  1429      *   of {@code codePointOffset} code points.
  1430      * @since 1.5
  1431      */
  1432     public static int offsetByCodePoints(CharSequence seq, int index,
  1433                                          int codePointOffset) {
  1434         int length = seq.length();
  1435         if (index < 0 || index > length) {
  1436             throw new IndexOutOfBoundsException();
  1437         }
  1438 
  1439         int x = index;
  1440         if (codePointOffset >= 0) {
  1441             int i;
  1442             for (i = 0; x < length && i < codePointOffset; i++) {
  1443                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
  1444                     isLowSurrogate(seq.charAt(x))) {
  1445                     x++;
  1446                 }
  1447             }
  1448             if (i < codePointOffset) {
  1449                 throw new IndexOutOfBoundsException();
  1450             }
  1451         } else {
  1452             int i;
  1453             for (i = codePointOffset; x > 0 && i < 0; i++) {
  1454                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
  1455                     isHighSurrogate(seq.charAt(x-1))) {
  1456                     x--;
  1457                 }
  1458             }
  1459             if (i < 0) {
  1460                 throw new IndexOutOfBoundsException();
  1461             }
  1462         }
  1463         return x;
  1464     }
  1465 
  1466     /**
  1467      * Returns the index within the given {@code char} subarray
  1468      * that is offset from the given {@code index} by
  1469      * {@code codePointOffset} code points. The
  1470      * {@code start} and {@code count} arguments specify a
  1471      * subarray of the {@code char} array. Unpaired surrogates
  1472      * within the text range given by {@code index} and
  1473      * {@code codePointOffset} count as one code point each.
  1474      *
  1475      * @param a the {@code char} array
  1476      * @param start the index of the first {@code char} of the
  1477      * subarray
  1478      * @param count the length of the subarray in {@code char}s
  1479      * @param index the index to be offset
  1480      * @param codePointOffset the offset in code points
  1481      * @return the index within the subarray
  1482      * @exception NullPointerException if {@code a} is null.
  1483      * @exception IndexOutOfBoundsException
  1484      *   if {@code start} or {@code count} is negative,
  1485      *   or if {@code start + count} is larger than the length of
  1486      *   the given array,
  1487      *   or if {@code index} is less than {@code start} or
  1488      *   larger then {@code start + count},
  1489      *   or if {@code codePointOffset} is positive and the text range
  1490      *   starting with {@code index} and ending with {@code start + count - 1}
  1491      *   has fewer than {@code codePointOffset} code
  1492      *   points,
  1493      *   or if {@code codePointOffset} is negative and the text range
  1494      *   starting with {@code start} and ending with {@code index - 1}
  1495      *   has fewer than the absolute value of
  1496      *   {@code codePointOffset} code points.
  1497      * @since 1.5
  1498      */
  1499     public static int offsetByCodePoints(char[] a, int start, int count,
  1500                                          int index, int codePointOffset) {
  1501         if (count > a.length-start || start < 0 || count < 0
  1502             || index < start || index > start+count) {
  1503             throw new IndexOutOfBoundsException();
  1504         }
  1505         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
  1506     }
  1507 
  1508     static int offsetByCodePointsImpl(char[]a, int start, int count,
  1509                                       int index, int codePointOffset) {
  1510         int x = index;
  1511         if (codePointOffset >= 0) {
  1512             int limit = start + count;
  1513             int i;
  1514             for (i = 0; x < limit && i < codePointOffset; i++) {
  1515                 if (isHighSurrogate(a[x++]) && x < limit &&
  1516                     isLowSurrogate(a[x])) {
  1517                     x++;
  1518                 }
  1519             }
  1520             if (i < codePointOffset) {
  1521                 throw new IndexOutOfBoundsException();
  1522             }
  1523         } else {
  1524             int i;
  1525             for (i = codePointOffset; x > start && i < 0; i++) {
  1526                 if (isLowSurrogate(a[--x]) && x > start &&
  1527                     isHighSurrogate(a[x-1])) {
  1528                     x--;
  1529                 }
  1530             }
  1531             if (i < 0) {
  1532                 throw new IndexOutOfBoundsException();
  1533             }
  1534         }
  1535         return x;
  1536     }
  1537 
  1538     /**
  1539      * Determines if the specified character is a lowercase character.
  1540      * <p>
  1541      * A character is lowercase if its general category type, provided
  1542      * by {@code Character.getType(ch)}, is
  1543      * {@code LOWERCASE_LETTER}, or it has contributory property
  1544      * Other_Lowercase as defined by the Unicode Standard.
  1545      * <p>
  1546      * The following are examples of lowercase characters:
  1547      * <p><blockquote><pre>
  1548      * a b c d e f g h i j k l m n o p q r s t u v w x y z
  1549      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
  1550      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
  1551      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
  1552      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
  1553      * </pre></blockquote>
  1554      * <p> Many other Unicode characters are lowercase too.
  1555      *
  1556      * <p><b>Note:</b> This method cannot handle <a
  1557      * href="#supplementary"> supplementary characters</a>. To support
  1558      * all Unicode characters, including supplementary characters, use
  1559      * the {@link #isLowerCase(int)} method.
  1560      *
  1561      * @param   ch   the character to be tested.
  1562      * @return  {@code true} if the character is lowercase;
  1563      *          {@code false} otherwise.
  1564      * @see     Character#isLowerCase(char)
  1565      * @see     Character#isTitleCase(char)
  1566      * @see     Character#toLowerCase(char)
  1567      * @see     Character#getType(char)
  1568      */
  1569     public static boolean isLowerCase(char ch) {
  1570         return ch == toLowerCase(ch);
  1571     }
  1572 
  1573     /**
  1574      * Determines if the specified character is an uppercase character.
  1575      * <p>
  1576      * A character is uppercase if its general category type, provided by
  1577      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
  1578      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
  1579      * <p>
  1580      * The following are examples of uppercase characters:
  1581      * <p><blockquote><pre>
  1582      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  1583      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
  1584      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
  1585      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
  1586      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
  1587      * </pre></blockquote>
  1588      * <p> Many other Unicode characters are uppercase too.<p>
  1589      *
  1590      * <p><b>Note:</b> This method cannot handle <a
  1591      * href="#supplementary"> supplementary characters</a>. To support
  1592      * all Unicode characters, including supplementary characters, use
  1593      * the {@link #isUpperCase(int)} method.
  1594      *
  1595      * @param   ch   the character to be tested.
  1596      * @return  {@code true} if the character is uppercase;
  1597      *          {@code false} otherwise.
  1598      * @see     Character#isLowerCase(char)
  1599      * @see     Character#isTitleCase(char)
  1600      * @see     Character#toUpperCase(char)
  1601      * @see     Character#getType(char)
  1602      * @since   1.0
  1603      */
  1604     public static boolean isUpperCase(char ch) {
  1605         return ch == toUpperCase(ch);
  1606     }
  1607 
  1608     /**
  1609      * Determines if the specified character is a titlecase character.
  1610      * <p>
  1611      * A character is a titlecase character if its general
  1612      * category type, provided by {@code Character.getType(ch)},
  1613      * is {@code TITLECASE_LETTER}.
  1614      * <p>
  1615      * Some characters look like pairs of Latin letters. For example, there
  1616      * is an uppercase letter that looks like "LJ" and has a corresponding
  1617      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1618      * is the appropriate form to use when rendering a word in lowercase
  1619      * with initial capitals, as for a book title.
  1620      * <p>
  1621      * These are some of the Unicode characters for which this method returns
  1622      * {@code true}:
  1623      * <ul>
  1624      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1625      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1626      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1627      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1628      * </ul>
  1629      * <p> Many other Unicode characters are titlecase too.<p>
  1630      *
  1631      * <p><b>Note:</b> This method cannot handle <a
  1632      * href="#supplementary"> supplementary characters</a>. To support
  1633      * all Unicode characters, including supplementary characters, use
  1634      * the {@link #isTitleCase(int)} method.
  1635      *
  1636      * @param   ch   the character to be tested.
  1637      * @return  {@code true} if the character is titlecase;
  1638      *          {@code false} otherwise.
  1639      * @see     Character#isLowerCase(char)
  1640      * @see     Character#isUpperCase(char)
  1641      * @see     Character#toTitleCase(char)
  1642      * @see     Character#getType(char)
  1643      * @since   1.0.2
  1644      */
  1645     public static boolean isTitleCase(char ch) {
  1646         return isTitleCase((int)ch);
  1647     }
  1648 
  1649     /**
  1650      * Determines if the specified character (Unicode code point) is a titlecase character.
  1651      * <p>
  1652      * A character is a titlecase character if its general
  1653      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1654      * is {@code TITLECASE_LETTER}.
  1655      * <p>
  1656      * Some characters look like pairs of Latin letters. For example, there
  1657      * is an uppercase letter that looks like "LJ" and has a corresponding
  1658      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1659      * is the appropriate form to use when rendering a word in lowercase
  1660      * with initial capitals, as for a book title.
  1661      * <p>
  1662      * These are some of the Unicode characters for which this method returns
  1663      * {@code true}:
  1664      * <ul>
  1665      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1666      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1667      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1668      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1669      * </ul>
  1670      * <p> Many other Unicode characters are titlecase too.<p>
  1671      *
  1672      * @param   codePoint the character (Unicode code point) to be tested.
  1673      * @return  {@code true} if the character is titlecase;
  1674      *          {@code false} otherwise.
  1675      * @see     Character#isLowerCase(int)
  1676      * @see     Character#isUpperCase(int)
  1677      * @see     Character#toTitleCase(int)
  1678      * @see     Character#getType(int)
  1679      * @since   1.5
  1680      */
  1681     public static boolean isTitleCase(int codePoint) {
  1682         return getType(codePoint) == Character.TITLECASE_LETTER;
  1683     }
  1684 
  1685     /**
  1686      * Determines if the specified character is a digit.
  1687      * <p>
  1688      * A character is a digit if its general category type, provided
  1689      * by {@code Character.getType(ch)}, is
  1690      * {@code DECIMAL_DIGIT_NUMBER}.
  1691      * <p>
  1692      * Some Unicode character ranges that contain digits:
  1693      * <ul>
  1694      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1695      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1696      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1697      *     Arabic-Indic digits
  1698      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1699      *     Extended Arabic-Indic digits
  1700      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1701      *     Devanagari digits
  1702      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1703      *     Fullwidth digits
  1704      * </ul>
  1705      *
  1706      * Many other character ranges contain digits as well.
  1707      *
  1708      * <p><b>Note:</b> This method cannot handle <a
  1709      * href="#supplementary"> supplementary characters</a>. To support
  1710      * all Unicode characters, including supplementary characters, use
  1711      * the {@link #isDigit(int)} method.
  1712      *
  1713      * @param   ch   the character to be tested.
  1714      * @return  {@code true} if the character is a digit;
  1715      *          {@code false} otherwise.
  1716      * @see     Character#digit(char, int)
  1717      * @see     Character#forDigit(int, int)
  1718      * @see     Character#getType(char)
  1719      */
  1720     public static boolean isDigit(char ch) {
  1721         return String.valueOf(ch).matches("\\d");
  1722     }
  1723 
  1724     /**
  1725      * Determines if the specified character (Unicode code point) is a digit.
  1726      * <p>
  1727      * A character is a digit if its general category type, provided
  1728      * by {@link Character#getType(int) getType(codePoint)}, is
  1729      * {@code DECIMAL_DIGIT_NUMBER}.
  1730      * <p>
  1731      * Some Unicode character ranges that contain digits:
  1732      * <ul>
  1733      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1734      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1735      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1736      *     Arabic-Indic digits
  1737      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1738      *     Extended Arabic-Indic digits
  1739      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1740      *     Devanagari digits
  1741      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1742      *     Fullwidth digits
  1743      * </ul>
  1744      *
  1745      * Many other character ranges contain digits as well.
  1746      *
  1747      * @param   codePoint the character (Unicode code point) to be tested.
  1748      * @return  {@code true} if the character is a digit;
  1749      *          {@code false} otherwise.
  1750      * @see     Character#forDigit(int, int)
  1751      * @see     Character#getType(int)
  1752      * @since   1.5
  1753      */
  1754     public static boolean isDigit(int codePoint) {
  1755         return fromCodeChars(codePoint).matches("\\d");
  1756     }
  1757     
  1758     @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
  1759     private native static String fromCodeChars(int codePoint);
  1760 
  1761     /**
  1762      * Determines if a character is defined in Unicode.
  1763      * <p>
  1764      * A character is defined if at least one of the following is true:
  1765      * <ul>
  1766      * <li>It has an entry in the UnicodeData file.
  1767      * <li>It has a value in a range defined by the UnicodeData file.
  1768      * </ul>
  1769      *
  1770      * <p><b>Note:</b> This method cannot handle <a
  1771      * href="#supplementary"> supplementary characters</a>. To support
  1772      * all Unicode characters, including supplementary characters, use
  1773      * the {@link #isDefined(int)} method.
  1774      *
  1775      * @param   ch   the character to be tested
  1776      * @return  {@code true} if the character has a defined meaning
  1777      *          in Unicode; {@code false} otherwise.
  1778      * @see     Character#isDigit(char)
  1779      * @see     Character#isLetter(char)
  1780      * @see     Character#isLetterOrDigit(char)
  1781      * @see     Character#isLowerCase(char)
  1782      * @see     Character#isTitleCase(char)
  1783      * @see     Character#isUpperCase(char)
  1784      * @since   1.0.2
  1785      */
  1786     public static boolean isDefined(char ch) {
  1787         return isDefined((int)ch);
  1788     }
  1789 
  1790     /**
  1791      * Determines if a character (Unicode code point) is defined in Unicode.
  1792      * <p>
  1793      * A character is defined if at least one of the following is true:
  1794      * <ul>
  1795      * <li>It has an entry in the UnicodeData file.
  1796      * <li>It has a value in a range defined by the UnicodeData file.
  1797      * </ul>
  1798      *
  1799      * @param   codePoint the character (Unicode code point) to be tested.
  1800      * @return  {@code true} if the character has a defined meaning
  1801      *          in Unicode; {@code false} otherwise.
  1802      * @see     Character#isDigit(int)
  1803      * @see     Character#isLetter(int)
  1804      * @see     Character#isLetterOrDigit(int)
  1805      * @see     Character#isLowerCase(int)
  1806      * @see     Character#isTitleCase(int)
  1807      * @see     Character#isUpperCase(int)
  1808      * @since   1.5
  1809      */
  1810     public static boolean isDefined(int codePoint) {
  1811         return getType(codePoint) != Character.UNASSIGNED;
  1812     }
  1813 
  1814     /**
  1815      * Determines if the specified character is a letter.
  1816      * <p>
  1817      * A character is considered to be a letter if its general
  1818      * category type, provided by {@code Character.getType(ch)},
  1819      * is any of the following:
  1820      * <ul>
  1821      * <li> {@code UPPERCASE_LETTER}
  1822      * <li> {@code LOWERCASE_LETTER}
  1823      * <li> {@code TITLECASE_LETTER}
  1824      * <li> {@code MODIFIER_LETTER}
  1825      * <li> {@code OTHER_LETTER}
  1826      * </ul>
  1827      *
  1828      * Not all letters have case. Many characters are
  1829      * letters but are neither uppercase nor lowercase nor titlecase.
  1830      *
  1831      * <p><b>Note:</b> This method cannot handle <a
  1832      * href="#supplementary"> supplementary characters</a>. To support
  1833      * all Unicode characters, including supplementary characters, use
  1834      * the {@link #isLetter(int)} method.
  1835      *
  1836      * @param   ch   the character to be tested.
  1837      * @return  {@code true} if the character is a letter;
  1838      *          {@code false} otherwise.
  1839      * @see     Character#isDigit(char)
  1840      * @see     Character#isJavaIdentifierStart(char)
  1841      * @see     Character#isJavaLetter(char)
  1842      * @see     Character#isJavaLetterOrDigit(char)
  1843      * @see     Character#isLetterOrDigit(char)
  1844      * @see     Character#isLowerCase(char)
  1845      * @see     Character#isTitleCase(char)
  1846      * @see     Character#isUnicodeIdentifierStart(char)
  1847      * @see     Character#isUpperCase(char)
  1848      */
  1849     public static boolean isLetter(char ch) {
  1850         return String.valueOf(ch).matches("\\w") && !isDigit(ch);
  1851     }
  1852 
  1853     /**
  1854      * Determines if the specified character (Unicode code point) is a letter.
  1855      * <p>
  1856      * A character is considered to be a letter if its general
  1857      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1858      * is any of the following:
  1859      * <ul>
  1860      * <li> {@code UPPERCASE_LETTER}
  1861      * <li> {@code LOWERCASE_LETTER}
  1862      * <li> {@code TITLECASE_LETTER}
  1863      * <li> {@code MODIFIER_LETTER}
  1864      * <li> {@code OTHER_LETTER}
  1865      * </ul>
  1866      *
  1867      * Not all letters have case. Many characters are
  1868      * letters but are neither uppercase nor lowercase nor titlecase.
  1869      *
  1870      * @param   codePoint the character (Unicode code point) to be tested.
  1871      * @return  {@code true} if the character is a letter;
  1872      *          {@code false} otherwise.
  1873      * @see     Character#isDigit(int)
  1874      * @see     Character#isJavaIdentifierStart(int)
  1875      * @see     Character#isLetterOrDigit(int)
  1876      * @see     Character#isLowerCase(int)
  1877      * @see     Character#isTitleCase(int)
  1878      * @see     Character#isUnicodeIdentifierStart(int)
  1879      * @see     Character#isUpperCase(int)
  1880      * @since   1.5
  1881      */
  1882     public static boolean isLetter(int codePoint) {
  1883         return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
  1884     }
  1885 
  1886     /**
  1887      * Determines if the specified character is a letter or digit.
  1888      * <p>
  1889      * A character is considered to be a letter or digit if either
  1890      * {@code Character.isLetter(char ch)} or
  1891      * {@code Character.isDigit(char ch)} returns
  1892      * {@code true} for the character.
  1893      *
  1894      * <p><b>Note:</b> This method cannot handle <a
  1895      * href="#supplementary"> supplementary characters</a>. To support
  1896      * all Unicode characters, including supplementary characters, use
  1897      * the {@link #isLetterOrDigit(int)} method.
  1898      *
  1899      * @param   ch   the character to be tested.
  1900      * @return  {@code true} if the character is a letter or digit;
  1901      *          {@code false} otherwise.
  1902      * @see     Character#isDigit(char)
  1903      * @see     Character#isJavaIdentifierPart(char)
  1904      * @see     Character#isJavaLetter(char)
  1905      * @see     Character#isJavaLetterOrDigit(char)
  1906      * @see     Character#isLetter(char)
  1907      * @see     Character#isUnicodeIdentifierPart(char)
  1908      * @since   1.0.2
  1909      */
  1910     public static boolean isLetterOrDigit(char ch) {
  1911         return String.valueOf(ch).matches("\\w");
  1912     }
  1913 
  1914     /**
  1915      * Determines if the specified character (Unicode code point) is a letter or digit.
  1916      * <p>
  1917      * A character is considered to be a letter or digit if either
  1918      * {@link #isLetter(int) isLetter(codePoint)} or
  1919      * {@link #isDigit(int) isDigit(codePoint)} returns
  1920      * {@code true} for the character.
  1921      *
  1922      * @param   codePoint the character (Unicode code point) to be tested.
  1923      * @return  {@code true} if the character is a letter or digit;
  1924      *          {@code false} otherwise.
  1925      * @see     Character#isDigit(int)
  1926      * @see     Character#isJavaIdentifierPart(int)
  1927      * @see     Character#isLetter(int)
  1928      * @see     Character#isUnicodeIdentifierPart(int)
  1929      * @since   1.5
  1930      */
  1931     public static boolean isLetterOrDigit(int codePoint) {
  1932         return fromCodeChars(codePoint).matches("\\w");
  1933     }
  1934     
  1935     public static int getType(int x) {
  1936         throw new UnsupportedOperationException("getType: " + (char)x);
  1937     }
  1938  
  1939     /**
  1940      * Determines if the specified character is
  1941      * permissible as the first character in a Java identifier.
  1942      * <p>
  1943      * A character may start a Java identifier if and only if
  1944      * one of the following conditions is true:
  1945      * <ul>
  1946      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
  1947      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
  1948      * <li> {@code ch} is a currency symbol (such as {@code '$'})
  1949      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
  1950      * </ul>
  1951      *
  1952      * <p><b>Note:</b> This method cannot handle <a
  1953      * href="#supplementary"> supplementary characters</a>. To support
  1954      * all Unicode characters, including supplementary characters, use
  1955      * the {@link #isJavaIdentifierStart(int)} method.
  1956      *
  1957      * @param   ch the character to be tested.
  1958      * @return  {@code true} if the character may start a Java identifier;
  1959      *          {@code false} otherwise.
  1960      * @see     Character#isJavaIdentifierPart(char)
  1961      * @see     Character#isLetter(char)
  1962      * @see     Character#isUnicodeIdentifierStart(char)
  1963      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1964      * @since   1.1
  1965      */
  1966     public static boolean isJavaIdentifierStart(char ch) {
  1967         return isJavaIdentifierStart((int)ch);
  1968     }
  1969 
  1970     /**
  1971      * Determines if the character (Unicode code point) is
  1972      * permissible as the first character in a Java identifier.
  1973      * <p>
  1974      * A character may start a Java identifier if and only if
  1975      * one of the following conditions is true:
  1976      * <ul>
  1977      * <li> {@link #isLetter(int) isLetter(codePoint)}
  1978      *      returns {@code true}
  1979      * <li> {@link #getType(int) getType(codePoint)}
  1980      *      returns {@code LETTER_NUMBER}
  1981      * <li> the referenced character is a currency symbol (such as {@code '$'})
  1982      * <li> the referenced character is a connecting punctuation character
  1983      *      (such as {@code '_'}).
  1984      * </ul>
  1985      *
  1986      * @param   codePoint the character (Unicode code point) to be tested.
  1987      * @return  {@code true} if the character may start a Java identifier;
  1988      *          {@code false} otherwise.
  1989      * @see     Character#isJavaIdentifierPart(int)
  1990      * @see     Character#isLetter(int)
  1991      * @see     Character#isUnicodeIdentifierStart(int)
  1992      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1993      * @since   1.5
  1994      */
  1995     public static boolean isJavaIdentifierStart(int codePoint) {
  1996         return 
  1997             ('A' <= codePoint && codePoint <= 'Z') ||
  1998             ('a' <= codePoint && codePoint <= 'z') ||
  1999             codePoint == '$';
  2000     }
  2001 
  2002     /**
  2003      * Determines if the specified character may be part of a Java
  2004      * identifier as other than the first character.
  2005      * <p>
  2006      * A character may be part of a Java identifier if any of the following
  2007      * are true:
  2008      * <ul>
  2009      * <li>  it is a letter
  2010      * <li>  it is a currency symbol (such as {@code '$'})
  2011      * <li>  it is a connecting punctuation character (such as {@code '_'})
  2012      * <li>  it is a digit
  2013      * <li>  it is a numeric letter (such as a Roman numeral character)
  2014      * <li>  it is a combining mark
  2015      * <li>  it is a non-spacing mark
  2016      * <li> {@code isIdentifierIgnorable} returns
  2017      * {@code true} for the character
  2018      * </ul>
  2019      *
  2020      * <p><b>Note:</b> This method cannot handle <a
  2021      * href="#supplementary"> supplementary characters</a>. To support
  2022      * all Unicode characters, including supplementary characters, use
  2023      * the {@link #isJavaIdentifierPart(int)} method.
  2024      *
  2025      * @param   ch      the character to be tested.
  2026      * @return {@code true} if the character may be part of a
  2027      *          Java identifier; {@code false} otherwise.
  2028      * @see     Character#isIdentifierIgnorable(char)
  2029      * @see     Character#isJavaIdentifierStart(char)
  2030      * @see     Character#isLetterOrDigit(char)
  2031      * @see     Character#isUnicodeIdentifierPart(char)
  2032      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  2033      * @since   1.1
  2034      */
  2035     public static boolean isJavaIdentifierPart(char ch) {
  2036         return isJavaIdentifierPart((int)ch);
  2037     }
  2038 
  2039     /**
  2040      * Determines if the character (Unicode code point) may be part of a Java
  2041      * identifier as other than the first character.
  2042      * <p>
  2043      * A character may be part of a Java identifier if any of the following
  2044      * are true:
  2045      * <ul>
  2046      * <li>  it is a letter
  2047      * <li>  it is a currency symbol (such as {@code '$'})
  2048      * <li>  it is a connecting punctuation character (such as {@code '_'})
  2049      * <li>  it is a digit
  2050      * <li>  it is a numeric letter (such as a Roman numeral character)
  2051      * <li>  it is a combining mark
  2052      * <li>  it is a non-spacing mark
  2053      * <li> {@link #isIdentifierIgnorable(int)
  2054      * isIdentifierIgnorable(codePoint)} returns {@code true} for
  2055      * the character
  2056      * </ul>
  2057      *
  2058      * @param   codePoint the character (Unicode code point) to be tested.
  2059      * @return {@code true} if the character may be part of a
  2060      *          Java identifier; {@code false} otherwise.
  2061      * @see     Character#isIdentifierIgnorable(int)
  2062      * @see     Character#isJavaIdentifierStart(int)
  2063      * @see     Character#isLetterOrDigit(int)
  2064      * @see     Character#isUnicodeIdentifierPart(int)
  2065      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  2066      * @since   1.5
  2067      */
  2068     public static boolean isJavaIdentifierPart(int codePoint) {
  2069         return isJavaIdentifierStart(codePoint) ||
  2070             ('0' <= codePoint && codePoint <= '9') || codePoint == '$';
  2071     }
  2072    
  2073     /**
  2074      * Converts the character argument to lowercase using case
  2075      * mapping information from the UnicodeData file.
  2076      * <p>
  2077      * Note that
  2078      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
  2079      * does not always return {@code true} for some ranges of
  2080      * characters, particularly those that are symbols or ideographs.
  2081      *
  2082      * <p>In general, {@link String#toLowerCase()} should be used to map
  2083      * characters to lowercase. {@code String} case mapping methods
  2084      * have several benefits over {@code Character} case mapping methods.
  2085      * {@code String} case mapping methods can perform locale-sensitive
  2086      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  2087      * the {@code Character} case mapping methods cannot.
  2088      *
  2089      * <p><b>Note:</b> This method cannot handle <a
  2090      * href="#supplementary"> supplementary characters</a>. To support
  2091      * all Unicode characters, including supplementary characters, use
  2092      * the {@link #toLowerCase(int)} method.
  2093      *
  2094      * @param   ch   the character to be converted.
  2095      * @return  the lowercase equivalent of the character, if any;
  2096      *          otherwise, the character itself.
  2097      * @see     Character#isLowerCase(char)
  2098      * @see     String#toLowerCase()
  2099      */
  2100     public static char toLowerCase(char ch) {
  2101         return String.valueOf(ch).toLowerCase().charAt(0);
  2102     }
  2103 
  2104     /**
  2105      * Converts the character argument to uppercase using case mapping
  2106      * information from the UnicodeData file.
  2107      * <p>
  2108      * Note that
  2109      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
  2110      * does not always return {@code true} for some ranges of
  2111      * characters, particularly those that are symbols or ideographs.
  2112      *
  2113      * <p>In general, {@link String#toUpperCase()} should be used to map
  2114      * characters to uppercase. {@code String} case mapping methods
  2115      * have several benefits over {@code Character} case mapping methods.
  2116      * {@code String} case mapping methods can perform locale-sensitive
  2117      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  2118      * the {@code Character} case mapping methods cannot.
  2119      *
  2120      * <p><b>Note:</b> This method cannot handle <a
  2121      * href="#supplementary"> supplementary characters</a>. To support
  2122      * all Unicode characters, including supplementary characters, use
  2123      * the {@link #toUpperCase(int)} method.
  2124      *
  2125      * @param   ch   the character to be converted.
  2126      * @return  the uppercase equivalent of the character, if any;
  2127      *          otherwise, the character itself.
  2128      * @see     Character#isUpperCase(char)
  2129      * @see     String#toUpperCase()
  2130      */
  2131     public static char toUpperCase(char ch) {
  2132         return String.valueOf(ch).toUpperCase().charAt(0);
  2133     }
  2134 
  2135     /**
  2136      * Returns the numeric value of the character {@code ch} in the
  2137      * specified radix.
  2138      * <p>
  2139      * If the radix is not in the range {@code MIN_RADIX} &le;
  2140      * {@code radix} &le; {@code MAX_RADIX} or if the
  2141      * value of {@code ch} is not a valid digit in the specified
  2142      * radix, {@code -1} is returned. A character is a valid digit
  2143      * if at least one of the following is true:
  2144      * <ul>
  2145      * <li>The method {@code isDigit} is {@code true} of the character
  2146      *     and the Unicode decimal digit value of the character (or its
  2147      *     single-character decomposition) is less than the specified radix.
  2148      *     In this case the decimal digit value is returned.
  2149      * <li>The character is one of the uppercase Latin letters
  2150      *     {@code 'A'} through {@code 'Z'} and its code is less than
  2151      *     {@code radix + 'A' - 10}.
  2152      *     In this case, {@code ch - 'A' + 10}
  2153      *     is returned.
  2154      * <li>The character is one of the lowercase Latin letters
  2155      *     {@code 'a'} through {@code 'z'} and its code is less than
  2156      *     {@code radix + 'a' - 10}.
  2157      *     In this case, {@code ch - 'a' + 10}
  2158      *     is returned.
  2159      * <li>The character is one of the fullwidth uppercase Latin letters A
  2160      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  2161      *     and its code is less than
  2162      *     {@code radix + '\u005CuFF21' - 10}.
  2163      *     In this case, {@code ch - '\u005CuFF21' + 10}
  2164      *     is returned.
  2165      * <li>The character is one of the fullwidth lowercase Latin letters a
  2166      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  2167      *     and its code is less than
  2168      *     {@code radix + '\u005CuFF41' - 10}.
  2169      *     In this case, {@code ch - '\u005CuFF41' + 10}
  2170      *     is returned.
  2171      * </ul>
  2172      *
  2173      * <p><b>Note:</b> This method cannot handle <a
  2174      * href="#supplementary"> supplementary characters</a>. To support
  2175      * all Unicode characters, including supplementary characters, use
  2176      * the {@link #digit(int, int)} method.
  2177      *
  2178      * @param   ch      the character to be converted.
  2179      * @param   radix   the radix.
  2180      * @return  the numeric value represented by the character in the
  2181      *          specified radix.
  2182      * @see     Character#forDigit(int, int)
  2183      * @see     Character#isDigit(char)
  2184      */
  2185     public static int digit(char ch, int radix) {
  2186         return digit((int)ch, radix);
  2187     }
  2188 
  2189     /**
  2190      * Returns the numeric value of the specified character (Unicode
  2191      * code point) in the specified radix.
  2192      *
  2193      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
  2194      * {@code radix} &le; {@code MAX_RADIX} or if the
  2195      * character is not a valid digit in the specified
  2196      * radix, {@code -1} is returned. A character is a valid digit
  2197      * if at least one of the following is true:
  2198      * <ul>
  2199      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
  2200      *     and the Unicode decimal digit value of the character (or its
  2201      *     single-character decomposition) is less than the specified radix.
  2202      *     In this case the decimal digit value is returned.
  2203      * <li>The character is one of the uppercase Latin letters
  2204      *     {@code 'A'} through {@code 'Z'} and its code is less than
  2205      *     {@code radix + 'A' - 10}.
  2206      *     In this case, {@code codePoint - 'A' + 10}
  2207      *     is returned.
  2208      * <li>The character is one of the lowercase Latin letters
  2209      *     {@code 'a'} through {@code 'z'} and its code is less than
  2210      *     {@code radix + 'a' - 10}.
  2211      *     In this case, {@code codePoint - 'a' + 10}
  2212      *     is returned.
  2213      * <li>The character is one of the fullwidth uppercase Latin letters A
  2214      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  2215      *     and its code is less than
  2216      *     {@code radix + '\u005CuFF21' - 10}.
  2217      *     In this case,
  2218      *     {@code codePoint - '\u005CuFF21' + 10}
  2219      *     is returned.
  2220      * <li>The character is one of the fullwidth lowercase Latin letters a
  2221      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  2222      *     and its code is less than
  2223      *     {@code radix + '\u005CuFF41'- 10}.
  2224      *     In this case,
  2225      *     {@code codePoint - '\u005CuFF41' + 10}
  2226      *     is returned.
  2227      * </ul>
  2228      *
  2229      * @param   codePoint the character (Unicode code point) to be converted.
  2230      * @param   radix   the radix.
  2231      * @return  the numeric value represented by the character in the
  2232      *          specified radix.
  2233      * @see     Character#forDigit(int, int)
  2234      * @see     Character#isDigit(int)
  2235      * @since   1.5
  2236      */
  2237     @JavaScriptBody(args = { "codePoint", "radix" }, body=
  2238         "var x = parseInt(String.fromCharCode(codePoint), radix);\n"
  2239       + "return isNaN(x) ? -1 : x;"
  2240     )
  2241     public static int digit(int codePoint, int radix) {
  2242         throw new UnsupportedOperationException();
  2243     }
  2244 
  2245     /**
  2246      * Returns the {@code int} value that the specified Unicode
  2247      * character represents. For example, the character
  2248      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
  2249      * an int with a value of 50.
  2250      * <p>
  2251      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  2252      * {@code '\u005Cu005A'}), lowercase
  2253      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  2254      * full width variant ({@code '\u005CuFF21'} through
  2255      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  2256      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  2257      * through 35. This is independent of the Unicode specification,
  2258      * which does not assign numeric values to these {@code char}
  2259      * values.
  2260      * <p>
  2261      * If the character does not have a numeric value, then -1 is returned.
  2262      * If the character has a numeric value that cannot be represented as a
  2263      * nonnegative integer (for example, a fractional value), then -2
  2264      * is returned.
  2265      *
  2266      * <p><b>Note:</b> This method cannot handle <a
  2267      * href="#supplementary"> supplementary characters</a>. To support
  2268      * all Unicode characters, including supplementary characters, use
  2269      * the {@link #getNumericValue(int)} method.
  2270      *
  2271      * @param   ch      the character to be converted.
  2272      * @return  the numeric value of the character, as a nonnegative {@code int}
  2273      *           value; -2 if the character has a numeric value that is not a
  2274      *          nonnegative integer; -1 if the character has no numeric value.
  2275      * @see     Character#forDigit(int, int)
  2276      * @see     Character#isDigit(char)
  2277      * @since   1.1
  2278      */
  2279     public static int getNumericValue(char ch) {
  2280         return getNumericValue((int)ch);
  2281     }
  2282 
  2283     /**
  2284      * Returns the {@code int} value that the specified
  2285      * character (Unicode code point) represents. For example, the character
  2286      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
  2287      * an {@code int} with a value of 50.
  2288      * <p>
  2289      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  2290      * {@code '\u005Cu005A'}), lowercase
  2291      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  2292      * full width variant ({@code '\u005CuFF21'} through
  2293      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  2294      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  2295      * through 35. This is independent of the Unicode specification,
  2296      * which does not assign numeric values to these {@code char}
  2297      * values.
  2298      * <p>
  2299      * If the character does not have a numeric value, then -1 is returned.
  2300      * If the character has a numeric value that cannot be represented as a
  2301      * nonnegative integer (for example, a fractional value), then -2
  2302      * is returned.
  2303      *
  2304      * @param   codePoint the character (Unicode code point) to be converted.
  2305      * @return  the numeric value of the character, as a nonnegative {@code int}
  2306      *          value; -2 if the character has a numeric value that is not a
  2307      *          nonnegative integer; -1 if the character has no numeric value.
  2308      * @see     Character#forDigit(int, int)
  2309      * @see     Character#isDigit(int)
  2310      * @since   1.5
  2311      */
  2312     public static int getNumericValue(int codePoint) {
  2313         throw new UnsupportedOperationException();
  2314     }
  2315 
  2316     /**
  2317      * Determines if the specified character is ISO-LATIN-1 white space.
  2318      * This method returns {@code true} for the following five
  2319      * characters only:
  2320      * <table>
  2321      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
  2322      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
  2323      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
  2324      *     <td>{@code NEW LINE}</td></tr>
  2325      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
  2326      *     <td>{@code FORM FEED}</td></tr>
  2327      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
  2328      *     <td>{@code CARRIAGE RETURN}</td></tr>
  2329      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
  2330      *     <td>{@code SPACE}</td></tr>
  2331      * </table>
  2332      *
  2333      * @param      ch   the character to be tested.
  2334      * @return     {@code true} if the character is ISO-LATIN-1 white
  2335      *             space; {@code false} otherwise.
  2336      * @see        Character#isSpaceChar(char)
  2337      * @see        Character#isWhitespace(char)
  2338      * @deprecated Replaced by isWhitespace(char).
  2339      */
  2340     @Deprecated
  2341     public static boolean isSpace(char ch) {
  2342         return isSpaceChar(ch);
  2343     }
  2344 
  2345     public static boolean isSpaceChar(int ch) {
  2346         return (ch <= 0x0020) &&
  2347             (((((1L << 0x0009) |
  2348             (1L << 0x000A) |
  2349             (1L << 0x000C) |
  2350             (1L << 0x000D) |
  2351             (1L << 0x0020)) >> ch) & 1L) != 0);
  2352     }
  2353 
  2354 
  2355     /**
  2356      * Determines if the specified character is white space according to Java.
  2357      * A character is a Java whitespace character if and only if it satisfies
  2358      * one of the following criteria:
  2359      * <ul>
  2360      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
  2361      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
  2362      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  2363      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  2364      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  2365      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  2366      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  2367      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  2368      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  2369      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  2370      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  2371      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  2372      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  2373      * </ul>
  2374      *
  2375      * <p><b>Note:</b> This method cannot handle <a
  2376      * href="#supplementary"> supplementary characters</a>. To support
  2377      * all Unicode characters, including supplementary characters, use
  2378      * the {@link #isWhitespace(int)} method.
  2379      *
  2380      * @param   ch the character to be tested.
  2381      * @return  {@code true} if the character is a Java whitespace
  2382      *          character; {@code false} otherwise.
  2383      * @see     Character#isSpaceChar(char)
  2384      * @since   1.1
  2385      */
  2386     public static boolean isWhitespace(char ch) {
  2387         return isWhitespace((int)ch);
  2388     }
  2389 
  2390     /**
  2391      * Determines if the specified character (Unicode code point) is
  2392      * white space according to Java.  A character is a Java
  2393      * whitespace character if and only if it satisfies one of the
  2394      * following criteria:
  2395      * <ul>
  2396      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
  2397      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
  2398      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  2399      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  2400      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  2401      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  2402      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  2403      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  2404      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  2405      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  2406      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  2407      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  2408      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  2409      * </ul>
  2410      * <p>
  2411      *
  2412      * @param   codePoint the character (Unicode code point) to be tested.
  2413      * @return  {@code true} if the character is a Java whitespace
  2414      *          character; {@code false} otherwise.
  2415      * @see     Character#isSpaceChar(int)
  2416      * @since   1.5
  2417      */
  2418     public static boolean isWhitespace(int codePoint) {
  2419         // values up to 128: [9,10,11,12,13,28,29,30,31,32]
  2420         if (9 <= codePoint && 13 >= codePoint) {
  2421             return true;
  2422         }
  2423         if (28 <= codePoint && 32 >= codePoint) {
  2424             return true;
  2425         }
  2426         return false;
  2427     }
  2428 
  2429     /**
  2430      * Determines if the specified character is an ISO control
  2431      * character.  A character is considered to be an ISO control
  2432      * character if its code is in the range {@code '\u005Cu0000'}
  2433      * through {@code '\u005Cu001F'} or in the range
  2434      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  2435      *
  2436      * <p><b>Note:</b> This method cannot handle <a
  2437      * href="#supplementary"> supplementary characters</a>. To support
  2438      * all Unicode characters, including supplementary characters, use
  2439      * the {@link #isISOControl(int)} method.
  2440      *
  2441      * @param   ch      the character to be tested.
  2442      * @return  {@code true} if the character is an ISO control character;
  2443      *          {@code false} otherwise.
  2444      *
  2445      * @see     Character#isSpaceChar(char)
  2446      * @see     Character#isWhitespace(char)
  2447      * @since   1.1
  2448      */
  2449     public static boolean isISOControl(char ch) {
  2450         return isISOControl((int)ch);
  2451     }
  2452 
  2453     /**
  2454      * Determines if the referenced character (Unicode code point) is an ISO control
  2455      * character.  A character is considered to be an ISO control
  2456      * character if its code is in the range {@code '\u005Cu0000'}
  2457      * through {@code '\u005Cu001F'} or in the range
  2458      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  2459      *
  2460      * @param   codePoint the character (Unicode code point) to be tested.
  2461      * @return  {@code true} if the character is an ISO control character;
  2462      *          {@code false} otherwise.
  2463      * @see     Character#isSpaceChar(int)
  2464      * @see     Character#isWhitespace(int)
  2465      * @since   1.5
  2466      */
  2467     public static boolean isISOControl(int codePoint) {
  2468         // Optimized form of:
  2469         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
  2470         //     (codePoint >= 0x7F && codePoint <= 0x9F);
  2471         return codePoint <= 0x9F &&
  2472             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
  2473     }
  2474 
  2475     /**
  2476      * Determines the character representation for a specific digit in
  2477      * the specified radix. If the value of {@code radix} is not a
  2478      * valid radix, or the value of {@code digit} is not a valid
  2479      * digit in the specified radix, the null character
  2480      * ({@code '\u005Cu0000'}) is returned.
  2481      * <p>
  2482      * The {@code radix} argument is valid if it is greater than or
  2483      * equal to {@code MIN_RADIX} and less than or equal to
  2484      * {@code MAX_RADIX}. The {@code digit} argument is valid if
  2485      * {@code 0 <= digit < radix}.
  2486      * <p>
  2487      * If the digit is less than 10, then
  2488      * {@code '0' + digit} is returned. Otherwise, the value
  2489      * {@code 'a' + digit - 10} is returned.
  2490      *
  2491      * @param   digit   the number to convert to a character.
  2492      * @param   radix   the radix.
  2493      * @return  the {@code char} representation of the specified digit
  2494      *          in the specified radix.
  2495      * @see     Character#MIN_RADIX
  2496      * @see     Character#MAX_RADIX
  2497      * @see     Character#digit(char, int)
  2498      */
  2499     public static char forDigit(int digit, int radix) {
  2500         if ((digit >= radix) || (digit < 0)) {
  2501             return '\0';
  2502         }
  2503         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
  2504             return '\0';
  2505         }
  2506         if (digit < 10) {
  2507             return (char)('0' + digit);
  2508         }
  2509         return (char)('a' - 10 + digit);
  2510     }
  2511 
  2512     /**
  2513      * Compares two {@code Character} objects numerically.
  2514      *
  2515      * @param   anotherCharacter   the {@code Character} to be compared.
  2516 
  2517      * @return  the value {@code 0} if the argument {@code Character}
  2518      *          is equal to this {@code Character}; a value less than
  2519      *          {@code 0} if this {@code Character} is numerically less
  2520      *          than the {@code Character} argument; and a value greater than
  2521      *          {@code 0} if this {@code Character} is numerically greater
  2522      *          than the {@code Character} argument (unsigned comparison).
  2523      *          Note that this is strictly a numerical comparison; it is not
  2524      *          locale-dependent.
  2525      * @since   1.2
  2526      */
  2527     public int compareTo(Character anotherCharacter) {
  2528         return compare(this.value, anotherCharacter.value);
  2529     }
  2530 
  2531     /**
  2532      * Compares two {@code char} values numerically.
  2533      * The value returned is identical to what would be returned by:
  2534      * <pre>
  2535      *    Character.valueOf(x).compareTo(Character.valueOf(y))
  2536      * </pre>
  2537      *
  2538      * @param  x the first {@code char} to compare
  2539      * @param  y the second {@code char} to compare
  2540      * @return the value {@code 0} if {@code x == y};
  2541      *         a value less than {@code 0} if {@code x < y}; and
  2542      *         a value greater than {@code 0} if {@code x > y}
  2543      * @since 1.7
  2544      */
  2545     public static int compare(char x, char y) {
  2546         return x - y;
  2547     }
  2548 
  2549 
  2550     /**
  2551      * The number of bits used to represent a <tt>char</tt> value in unsigned
  2552      * binary form, constant {@code 16}.
  2553      *
  2554      * @since 1.5
  2555      */
  2556     public static final int SIZE = 16;
  2557 
  2558     /**
  2559      * Returns the value obtained by reversing the order of the bytes in the
  2560      * specified <tt>char</tt> value.
  2561      *
  2562      * @return the value obtained by reversing (or, equivalently, swapping)
  2563      *     the bytes in the specified <tt>char</tt> value.
  2564      * @since 1.5
  2565      */
  2566     public static char reverseBytes(char ch) {
  2567         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
  2568     }
  2569 
  2570     static {
  2571         // as last step of initialization, initialize valueOf method
  2572         initValueOf();
  2573     }
  2574     @JavaScriptBody(args = {}, body = 
  2575         "vm.java_lang_Character(false)." +
  2576         "valueOf = function() { return this._value(); };"
  2577     )
  2578     private native static void initValueOf();
  2579     
  2580 }