hg/bck2brwsr: rt/emul/mini/src/main/java/java/lang/Character.java@cd1cc103a03c

     1 /*

     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package java.lang;

    28 import org.apidesign.bck2brwsr.core.JavaScriptBody;

    30 /**

    31  * The {@code Character} class wraps a value of the primitive

    32  * type {@code char} in an object. An object of type

    33  * {@code Character} contains a single field whose type is

    34  * {@code char}.

    35  * <p>

    36  * In addition, this class provides several methods for determining

    37  * a character's category (lowercase letter, digit, etc.) and for converting

    38  * characters from uppercase to lowercase and vice versa.

    39  * <p>

    40  * Character information is based on the Unicode Standard, version 6.0.0.

    41  * <p>

    42  * The methods and data of class {@code Character} are defined by

    43  * the information in the <i>UnicodeData</i> file that is part of the

    44  * Unicode Character Database maintained by the Unicode

    45  * Consortium. This file specifies various properties including name

    46  * and general category for every defined Unicode code point or

    47  * character range.

    48  * <p>

    49  * The file and its description are available from the Unicode Consortium at:

    50  * <ul>

    51  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>

    52  * </ul>

    53  *

    54  * <h4><a name="unicode">Unicode Character Representations</a></h4>

    55  *

    56  * <p>The {@code char} data type (and therefore the value that a

    57  * {@code Character} object encapsulates) are based on the

    58  * original Unicode specification, which defined characters as

    59  * fixed-width 16-bit entities. The Unicode Standard has since been

    60  * changed to allow for characters whose representation requires more

    61  * than 16 bits.  The range of legal <em>code point</em>s is now

    62  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.

    63  * (Refer to the <a

    64  * href="http://www.unicode.org/reports/tr27/#notation"><i>

    65  * definition</i></a> of the U+<i>n</i> notation in the Unicode

    66  * Standard.)

    67  *

    68  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is

    69  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.

    70  * <a name="supplementary">Characters</a> whose code points are greater

    71  * than U+FFFF are called <em>supplementary character</em>s.  The Java

    72  * platform uses the UTF-16 representation in {@code char} arrays and

    73  * in the {@code String} and {@code StringBuffer} classes. In

    74  * this representation, supplementary characters are represented as a pair

    75  * of {@code char} values, the first from the <em>high-surrogates</em>

    76  * range, (&#92;uD800-&#92;uDBFF), the second from the

    77  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).

    78  *

    79  * <p>A {@code char} value, therefore, represents Basic

    80  * Multilingual Plane (BMP) code points, including the surrogate

    81  * code points, or code units of the UTF-16 encoding. An

    82  * {@code int} value represents all Unicode code points,

    83  * including supplementary code points. The lower (least significant)

    84  * 21 bits of {@code int} are used to represent Unicode code

    85  * points and the upper (most significant) 11 bits must be zero.

    86  * Unless otherwise specified, the behavior with respect to

    87  * supplementary characters and surrogate {@code char} values is

    88  * as follows:

    89  *

    90  * <ul>

    91  * <li>The methods that only accept a {@code char} value cannot support

    92  * supplementary characters. They treat {@code char} values from the

    93  * surrogate ranges as undefined characters. For example,

    94  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though

    95  * this specific value if followed by any low-surrogate value in a string

    96  * would represent a letter.

    97  *

    98  * <li>The methods that accept an {@code int} value support all

    99  * Unicode characters, including supplementary characters. For

   100  * example, {@code Character.isLetter(0x2F81A)} returns

   101  * {@code true} because the code point value represents a letter

   102  * (a CJK ideograph).

   103  * </ul>

   104  *

   105  * <p>In the Java SE API documentation, <em>Unicode code point</em> is

   106  * used for character values in the range between U+0000 and U+10FFFF,

   107  * and <em>Unicode code unit</em> is used for 16-bit

   108  * {@code char} values that are code units of the <em>UTF-16</em>

   109  * encoding. For more information on Unicode terminology, refer to the

   110  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.

   111  *

   112  * @author  Lee Boynton

   113  * @author  Guy Steele

   114  * @author  Akira Tanaka

   115  * @author  Martin Buchholz

   116  * @author  Ulf Zibis

   117  * @since   1.0

   118  */

   119 public final

   120 class Character implements java.io.Serializable, Comparable<Character> {

   121     /**

   122      * The minimum radix available for conversion to and from strings.

   123      * The constant value of this field is the smallest value permitted

   124      * for the radix argument in radix-conversion methods such as the

   125      * {@code digit} method, the {@code forDigit} method, and the

   126      * {@code toString} method of class {@code Integer}.

   127      *

   128      * @see     Character#digit(char, int)

   129      * @see     Character#forDigit(int, int)

   130      * @see     Integer#toString(int, int)

   131      * @see     Integer#valueOf(String)

   132      */

   133     public static final int MIN_RADIX = 2;

   135     /**

   136      * The maximum radix available for conversion to and from strings.

   137      * The constant value of this field is the largest value permitted

   138      * for the radix argument in radix-conversion methods such as the

   139      * {@code digit} method, the {@code forDigit} method, and the

   140      * {@code toString} method of class {@code Integer}.

   141      *

   142      * @see     Character#digit(char, int)

   143      * @see     Character#forDigit(int, int)

   144      * @see     Integer#toString(int, int)

   145      * @see     Integer#valueOf(String)

   146      */

   147     public static final int MAX_RADIX = 36;

   149     /**

   150      * The constant value of this field is the smallest value of type

   151      * {@code char}, {@code '\u005Cu0000'}.

   152      *

   153      * @since   1.0.2

   154      */

   155     public static final char MIN_VALUE = '\u0000';

   157     /**

   158      * The constant value of this field is the largest value of type

   159      * {@code char}, {@code '\u005CuFFFF'}.

   160      *

   161      * @since   1.0.2

   162      */

   163     public static final char MAX_VALUE = '\uFFFF';

   165     /**

   166      * The {@code Class} instance representing the primitive type

   167      * {@code char}.

   168      *

   169      * @since   1.1

   170      */

   171     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");

   173     /*

   174      * Normative general types

   175      */

   177     /*

   178      * General character types

   179      */

   181     /**

   182      * General category "Cn" in the Unicode specification.

   183      * @since   1.1

   184      */

   185     public static final byte UNASSIGNED = 0;

   187     /**

   188      * General category "Lu" in the Unicode specification.

   189      * @since   1.1

   190      */

   191     public static final byte UPPERCASE_LETTER = 1;

   193     /**

   194      * General category "Ll" in the Unicode specification.

   195      * @since   1.1

   196      */

   197     public static final byte LOWERCASE_LETTER = 2;

   199     /**

   200      * General category "Lt" in the Unicode specification.

   201      * @since   1.1

   202      */

   203     public static final byte TITLECASE_LETTER = 3;

   205     /**

   206      * General category "Lm" in the Unicode specification.

   207      * @since   1.1

   208      */

   209     public static final byte MODIFIER_LETTER = 4;

   211     /**

   212      * General category "Lo" in the Unicode specification.

   213      * @since   1.1

   214      */

   215     public static final byte OTHER_LETTER = 5;

   217     /**

   218      * General category "Mn" in the Unicode specification.

   219      * @since   1.1

   220      */

   221     public static final byte NON_SPACING_MARK = 6;

   223     /**

   224      * General category "Me" in the Unicode specification.

   225      * @since   1.1

   226      */

   227     public static final byte ENCLOSING_MARK = 7;

   229     /**

   230      * General category "Mc" in the Unicode specification.

   231      * @since   1.1

   232      */

   233     public static final byte COMBINING_SPACING_MARK = 8;

   235     /**

   236      * General category "Nd" in the Unicode specification.

   237      * @since   1.1

   238      */

   239     public static final byte DECIMAL_DIGIT_NUMBER        = 9;

   241     /**

   242      * General category "Nl" in the Unicode specification.

   243      * @since   1.1

   244      */

   245     public static final byte LETTER_NUMBER = 10;

   247     /**

   248      * General category "No" in the Unicode specification.

   249      * @since   1.1

   250      */

   251     public static final byte OTHER_NUMBER = 11;

   253     /**

   254      * General category "Zs" in the Unicode specification.

   255      * @since   1.1

   256      */

   257     public static final byte SPACE_SEPARATOR = 12;

   259     /**

   260      * General category "Zl" in the Unicode specification.

   261      * @since   1.1

   262      */

   263     public static final byte LINE_SEPARATOR = 13;

   265     /**

   266      * General category "Zp" in the Unicode specification.

   267      * @since   1.1

   268      */

   269     public static final byte PARAGRAPH_SEPARATOR = 14;

   271     /**

   272      * General category "Cc" in the Unicode specification.

   273      * @since   1.1

   274      */

   275     public static final byte CONTROL = 15;

   277     /**

   278      * General category "Cf" in the Unicode specification.

   279      * @since   1.1

   280      */

   281     public static final byte FORMAT = 16;

   283     /**

   284      * General category "Co" in the Unicode specification.

   285      * @since   1.1

   286      */

   287     public static final byte PRIVATE_USE = 18;

   289     /**

   290      * General category "Cs" in the Unicode specification.

   291      * @since   1.1

   292      */

   293     public static final byte SURROGATE = 19;

   295     /**

   296      * General category "Pd" in the Unicode specification.

   297      * @since   1.1

   298      */

   299     public static final byte DASH_PUNCTUATION = 20;

   301     /**

   302      * General category "Ps" in the Unicode specification.

   303      * @since   1.1

   304      */

   305     public static final byte START_PUNCTUATION = 21;

   307     /**

   308      * General category "Pe" in the Unicode specification.

   309      * @since   1.1

   310      */

   311     public static final byte END_PUNCTUATION = 22;

   313     /**

   314      * General category "Pc" in the Unicode specification.

   315      * @since   1.1

   316      */

   317     public static final byte CONNECTOR_PUNCTUATION = 23;

   319     /**

   320      * General category "Po" in the Unicode specification.

   321      * @since   1.1

   322      */

   323     public static final byte OTHER_PUNCTUATION = 24;

   325     /**

   326      * General category "Sm" in the Unicode specification.

   327      * @since   1.1

   328      */

   329     public static final byte MATH_SYMBOL = 25;

   331     /**

   332      * General category "Sc" in the Unicode specification.

   333      * @since   1.1

   334      */

   335     public static final byte CURRENCY_SYMBOL = 26;

   337     /**

   338      * General category "Sk" in the Unicode specification.

   339      * @since   1.1

   340      */

   341     public static final byte MODIFIER_SYMBOL = 27;

   343     /**

   344      * General category "So" in the Unicode specification.

   345      * @since   1.1

   346      */

   347     public static final byte OTHER_SYMBOL = 28;

   349     /**

   350      * General category "Pi" in the Unicode specification.

   351      * @since   1.4

   352      */

   353     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;

   355     /**

   356      * General category "Pf" in the Unicode specification.

   357      * @since   1.4

   358      */

   359     public static final byte FINAL_QUOTE_PUNCTUATION = 30;

   361     /**

   362      * Error flag. Use int (code point) to avoid confusion with U+FFFF.

   363      */

   364     static final int ERROR = 0xFFFFFFFF;

   367     /**

   368      * Undefined bidirectional character type. Undefined {@code char}

   369      * values have undefined directionality in the Unicode specification.

   370      * @since 1.4

   371      */

   372     public static final byte DIRECTIONALITY_UNDEFINED = -1;

   374     /**

   375      * Strong bidirectional character type "L" in the Unicode specification.

   376      * @since 1.4

   377      */

   378     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;

   380     /**

   381      * Strong bidirectional character type "R" in the Unicode specification.

   382      * @since 1.4

   383      */

   384     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;

   386     /**

   387     * Strong bidirectional character type "AL" in the Unicode specification.

   388      * @since 1.4

   389      */

   390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;

   392     /**

   393      * Weak bidirectional character type "EN" in the Unicode specification.

   394      * @since 1.4

   395      */

   396     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;

   398     /**

   399      * Weak bidirectional character type "ES" in the Unicode specification.

   400      * @since 1.4

   401      */

   402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;

   404     /**

   405      * Weak bidirectional character type "ET" in the Unicode specification.

   406      * @since 1.4

   407      */

   408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;

   410     /**

   411      * Weak bidirectional character type "AN" in the Unicode specification.

   412      * @since 1.4

   413      */

   414     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;

   416     /**

   417      * Weak bidirectional character type "CS" in the Unicode specification.

   418      * @since 1.4

   419      */

   420     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;

   422     /**

   423      * Weak bidirectional character type "NSM" in the Unicode specification.

   424      * @since 1.4

   425      */

   426     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;

   428     /**

   429      * Weak bidirectional character type "BN" in the Unicode specification.

   430      * @since 1.4

   431      */

   432     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;

   434     /**

   435      * Neutral bidirectional character type "B" in the Unicode specification.

   436      * @since 1.4

   437      */

   438     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;

   440     /**

   441      * Neutral bidirectional character type "S" in the Unicode specification.

   442      * @since 1.4

   443      */

   444     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;

   446     /**

   447      * Neutral bidirectional character type "WS" in the Unicode specification.

   448      * @since 1.4

   449      */

   450     public static final byte DIRECTIONALITY_WHITESPACE = 12;

   452     /**

   453      * Neutral bidirectional character type "ON" in the Unicode specification.

   454      * @since 1.4

   455      */

   456     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;

   458     /**

   459      * Strong bidirectional character type "LRE" in the Unicode specification.

   460      * @since 1.4

   461      */

   462     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;

   464     /**

   465      * Strong bidirectional character type "LRO" in the Unicode specification.

   466      * @since 1.4

   467      */

   468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;

   470     /**

   471      * Strong bidirectional character type "RLE" in the Unicode specification.

   472      * @since 1.4

   473      */

   474     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;

   476     /**

   477      * Strong bidirectional character type "RLO" in the Unicode specification.

   478      * @since 1.4

   479      */

   480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;

   482     /**

   483      * Weak bidirectional character type "PDF" in the Unicode specification.

   484      * @since 1.4

   485      */

   486     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;

   488     /**

   489      * The minimum value of a

   490      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   491      * Unicode high-surrogate code unit</a>

   492      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.

   493      * A high-surrogate is also known as a <i>leading-surrogate</i>.

   494      *

   495      * @since 1.5

   496      */

   497     public static final char MIN_HIGH_SURROGATE = '\uD800';

   499     /**

   500      * The maximum value of a

   501      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   502      * Unicode high-surrogate code unit</a>

   503      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.

   504      * A high-surrogate is also known as a <i>leading-surrogate</i>.

   505      *

   506      * @since 1.5

   507      */

   508     public static final char MAX_HIGH_SURROGATE = '\uDBFF';

   510     /**

   511      * The minimum value of a

   512      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   513      * Unicode low-surrogate code unit</a>

   514      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.

   515      * A low-surrogate is also known as a <i>trailing-surrogate</i>.

   516      *

   517      * @since 1.5

   518      */

   519     public static final char MIN_LOW_SURROGATE  = '\uDC00';

   521     /**

   522      * The maximum value of a

   523      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   524      * Unicode low-surrogate code unit</a>

   525      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.

   526      * A low-surrogate is also known as a <i>trailing-surrogate</i>.

   527      *

   528      * @since 1.5

   529      */

   530     public static final char MAX_LOW_SURROGATE  = '\uDFFF';

   532     /**

   533      * The minimum value of a Unicode surrogate code unit in the

   534      * UTF-16 encoding, constant {@code '\u005CuD800'}.

   535      *

   536      * @since 1.5

   537      */

   538     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;

   540     /**

   541      * The maximum value of a Unicode surrogate code unit in the

   542      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.

   543      *

   544      * @since 1.5

   545      */

   546     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;

   548     /**

   549      * The minimum value of a

   550      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">

   551      * Unicode supplementary code point</a>, constant {@code U+10000}.

   552      *

   553      * @since 1.5

   554      */

   555     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;

   557     /**

   558      * The minimum value of a

   559      * <a href="http://www.unicode.org/glossary/#code_point">

   560      * Unicode code point</a>, constant {@code U+0000}.

   561      *

   562      * @since 1.5

   563      */

   564     public static final int MIN_CODE_POINT = 0x000000;

   566     /**

   567      * The maximum value of a

   568      * <a href="http://www.unicode.org/glossary/#code_point">

   569      * Unicode code point</a>, constant {@code U+10FFFF}.

   570      *

   571      * @since 1.5

   572      */

   573     public static final int MAX_CODE_POINT = 0X10FFFF;

   575     public static boolean isAlphabetic(int ch) {

   576         throw new UnsupportedOperationException("isAlphabetic: " + (char)ch);

   577     }

   579     public static boolean isIdeographic(int ch) {

   580         throw new UnsupportedOperationException("isIdeographic: " + (char)ch);

   581     }

   583     public static boolean isLowerCase(int ch) {

   584         throw new UnsupportedOperationException("isLowerCase: " + (char)ch);

   585     }

   587     public static boolean isUpperCase(int ch) {

   588         throw new UnsupportedOperationException("isUpperCase: " + (char)ch);

   589     }

   591     public static boolean isMirrored(int ch) {

   592         throw new UnsupportedOperationException("isMirrored: " + (char)ch);

   593     }

   595     public static boolean isIdentifierIgnorable(int ch) {

   596         throw new UnsupportedOperationException("isIdentifierIgnorable: " + (char)ch);

   597     }

   599     public static boolean isUnicodeIdentifierPart(int ch) {

   600         throw new UnsupportedOperationException("isUnicodeIdentifierPart: " + (char)ch);

   601     }

   603     public static boolean isUnicodeIdentifierStart(int ch) {

   604         throw new UnsupportedOperationException("isUnicodeIdentifierStart: " + (char)ch);

   605     }

   607     public static char toUpperCase(int ch) {

   608         throw new UnsupportedOperationException("toUpperCase: " + (char)ch);

   609     }

   611     public static int toLowerCase(int ch) {

   612         throw new UnsupportedOperationException("toLowerCase: " + (char)ch);

   613     }

   616     /**

   617      * Instances of this class represent particular subsets of the Unicode

   618      * character set.  The only family of subsets defined in the

   619      * {@code Character} class is {@link Character.UnicodeBlock}.

   620      * Other portions of the Java API may define other subsets for their

   621      * own purposes.

   622      *

   623      * @since 1.2

   624      */

   625     public static class Subset  {

   627         private String name;

   629         /**

   630          * Constructs a new {@code Subset} instance.

   631          *

   632          * @param  name  The name of this subset

   633          * @exception NullPointerException if name is {@code null}

   634          */

   635         protected Subset(String name) {

   636             if (name == null) {

   637                 throw new NullPointerException("name");

   638             }

   639             this.name = name;

   640         }

   642         /**

   643          * Compares two {@code Subset} objects for equality.

   644          * This method returns {@code true} if and only if

   645          * {@code this} and the argument refer to the same

   646          * object; since this method is {@code final}, this

   647          * guarantee holds for all subclasses.

   648          */

   649         public final boolean equals(Object obj) {

   650             return (this == obj);

   651         }

   653         /**

   654          * Returns the standard hash code as defined by the

   655          * {@link Object#hashCode} method.  This method

   656          * is {@code final} in order to ensure that the

   657          * {@code equals} and {@code hashCode} methods will

   658          * be consistent in all subclasses.

   659          */

   660         public final int hashCode() {

   661             return super.hashCode();

   662         }

   664         /**

   665          * Returns the name of this subset.

   666          */

   667         public final String toString() {

   668             return name;

   669         }

   670     }

   672     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt

   673     // for the latest specification of Unicode Blocks.

   676     /**

   677      * The value of the {@code Character}.

   678      *

   679      * @serial

   680      */

   681     private final char value;

   683     /** use serialVersionUID from JDK 1.0.2 for interoperability */

   684     private static final long serialVersionUID = 3786198910865385080L;

   686     /**

   687      * Constructs a newly allocated {@code Character} object that

   688      * represents the specified {@code char} value.

   689      *

   690      * @param  value   the value to be represented by the

   691      *                  {@code Character} object.

   692      */

   693     public Character(char value) {

   694         this.value = value;

   695     }

   697     private static class CharacterCache {

   698         private CharacterCache(){}

   700         static final Character cache[] = new Character[127 + 1];

   702         static {

   703             for (int i = 0; i < cache.length; i++)

   704                 cache[i] = new Character((char)i);

   705         }

   706     }

   708     /**

   709      * Returns a <tt>Character</tt> instance representing the specified

   710      * <tt>char</tt> value.

   711      * If a new <tt>Character</tt> instance is not required, this method

   712      * should generally be used in preference to the constructor

   713      * {@link #Character(char)}, as this method is likely to yield

   714      * significantly better space and time performance by caching

   715      * frequently requested values.

   716      *

   717      * This method will always cache values in the range {@code

   718      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may

   719      * cache other values outside of this range.

   720      *

   721      * @param  c a char value.

   722      * @return a <tt>Character</tt> instance representing <tt>c</tt>.

   723      * @since  1.5

   724      */

   725     public static Character valueOf(char c) {

   726         if (c <= 127) { // must cache

   727             return CharacterCache.cache[(int)c];

   728         }

   729         return new Character(c);

   730     }

   732     /**

   733      * Returns the value of this {@code Character} object.

   734      * @return  the primitive {@code char} value represented by

   735      *          this object.

   736      */

   737     public char charValue() {

   738         return value;

   739     }

   741     /**

   742      * Returns a hash code for this {@code Character}; equal to the result

   743      * of invoking {@code charValue()}.

   744      *

   745      * @return a hash code value for this {@code Character}

   746      */

   747     public int hashCode() {

   748         return (int)value;

   749     }

   751     /**

   752      * Compares this object against the specified object.

   753      * The result is {@code true} if and only if the argument is not

   754      * {@code null} and is a {@code Character} object that

   755      * represents the same {@code char} value as this object.

   756      *

   757      * @param   obj   the object to compare with.

   758      * @return  {@code true} if the objects are the same;

   759      *          {@code false} otherwise.

   760      */

   761     public boolean equals(Object obj) {

   762         if (obj instanceof Character) {

   763             return value == ((Character)obj).charValue();

   764         }

   765         return false;

   766     }

   768     /**

   769      * Returns a {@code String} object representing this

   770      * {@code Character}'s value.  The result is a string of

   771      * length 1 whose sole component is the primitive

   772      * {@code char} value represented by this

   773      * {@code Character} object.

   774      *

   775      * @return  a string representation of this object.

   776      */

   777     public String toString() {

   778         char buf[] = {value};

   779         return String.valueOf(buf);

   780     }

   782     /**

   783      * Returns a {@code String} object representing the

   784      * specified {@code char}.  The result is a string of length

   785      * 1 consisting solely of the specified {@code char}.

   786      *

   787      * @param c the {@code char} to be converted

   788      * @return the string representation of the specified {@code char}

   789      * @since 1.4

   790      */

   791     public static String toString(char c) {

   792         return String.valueOf(c);

   793     }

   795     /**

   796      * Determines whether the specified code point is a valid

   797      * <a href="http://www.unicode.org/glossary/#code_point">

   798      * Unicode code point value</a>.

   799      *

   800      * @param  codePoint the Unicode code point to be tested

   801      * @return {@code true} if the specified code point value is between

   802      *         {@link #MIN_CODE_POINT} and

   803      *         {@link #MAX_CODE_POINT} inclusive;

   804      *         {@code false} otherwise.

   805      * @since  1.5

   806      */

   807     public static boolean isValidCodePoint(int codePoint) {

   808         // Optimized form of:

   809         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT

   810         int plane = codePoint >>> 16;

   811         return plane < ((MAX_CODE_POINT + 1) >>> 16);

   812     }

   814     /**

   815      * Determines whether the specified character (Unicode code point)

   816      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.

   817      * Such code points can be represented using a single {@code char}.

   818      *

   819      * @param  codePoint the character (Unicode code point) to be tested

   820      * @return {@code true} if the specified code point is between

   821      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;

   822      *         {@code false} otherwise.

   823      * @since  1.7

   824      */

   825     public static boolean isBmpCodePoint(int codePoint) {

   826         return codePoint >>> 16 == 0;

   827         // Optimized form of:

   828         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE

   829         // We consistently use logical shift (>>>) to facilitate

   830         // additional runtime optimizations.

   831     }

   833     /**

   834      * Determines whether the specified character (Unicode code point)

   835      * is in the <a href="#supplementary">supplementary character</a> range.

   836      *

   837      * @param  codePoint the character (Unicode code point) to be tested

   838      * @return {@code true} if the specified code point is between

   839      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and

   840      *         {@link #MAX_CODE_POINT} inclusive;

   841      *         {@code false} otherwise.

   842      * @since  1.5

   843      */

   844     public static boolean isSupplementaryCodePoint(int codePoint) {

   845         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT

   846             && codePoint <  MAX_CODE_POINT + 1;

   847     }

   849     /**

   850      * Determines if the given {@code char} value is a

   851      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   852      * Unicode high-surrogate code unit</a>

   853      * (also known as <i>leading-surrogate code unit</i>).

   854      *

   855      * <p>Such values do not represent characters by themselves,

   856      * but are used in the representation of

   857      * <a href="#supplementary">supplementary characters</a>

   858      * in the UTF-16 encoding.

   859      *

   860      * @param  ch the {@code char} value to be tested.

   861      * @return {@code true} if the {@code char} value is between

   862      *         {@link #MIN_HIGH_SURROGATE} and

   863      *         {@link #MAX_HIGH_SURROGATE} inclusive;

   864      *         {@code false} otherwise.

   865      * @see    Character#isLowSurrogate(char)

   866      * @see    Character.UnicodeBlock#of(int)

   867      * @since  1.5

   868      */

   869     public static boolean isHighSurrogate(char ch) {

   870         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE

   871         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);

   872     }

   874     /**

   875      * Determines if the given {@code char} value is a

   876      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   877      * Unicode low-surrogate code unit</a>

   878      * (also known as <i>trailing-surrogate code unit</i>).

   879      *

   880      * <p>Such values do not represent characters by themselves,

   881      * but are used in the representation of

   882      * <a href="#supplementary">supplementary characters</a>

   883      * in the UTF-16 encoding.

   884      *

   885      * @param  ch the {@code char} value to be tested.

   886      * @return {@code true} if the {@code char} value is between

   887      *         {@link #MIN_LOW_SURROGATE} and

   888      *         {@link #MAX_LOW_SURROGATE} inclusive;

   889      *         {@code false} otherwise.

   890      * @see    Character#isHighSurrogate(char)

   891      * @since  1.5

   892      */

   893     public static boolean isLowSurrogate(char ch) {

   894         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);

   895     }

   897     /**

   898      * Determines if the given {@code char} value is a Unicode

   899      * <i>surrogate code unit</i>.

   900      *

   901      * <p>Such values do not represent characters by themselves,

   902      * but are used in the representation of

   903      * <a href="#supplementary">supplementary characters</a>

   904      * in the UTF-16 encoding.

   905      *

   906      * <p>A char value is a surrogate code unit if and only if it is either

   907      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or

   908      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.

   909      *

   910      * @param  ch the {@code char} value to be tested.

   911      * @return {@code true} if the {@code char} value is between

   912      *         {@link #MIN_SURROGATE} and

   913      *         {@link #MAX_SURROGATE} inclusive;

   914      *         {@code false} otherwise.

   915      * @since  1.7

   916      */

   917     public static boolean isSurrogate(char ch) {

   918         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);

   919     }

   921     /**

   922      * Determines whether the specified pair of {@code char}

   923      * values is a valid

   924      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

   925      * Unicode surrogate pair</a>.

   927      * <p>This method is equivalent to the expression:

   928      * <blockquote><pre>

   929      * isHighSurrogate(high) && isLowSurrogate(low)

   930      * </pre></blockquote>

   931      *

   932      * @param  high the high-surrogate code value to be tested

   933      * @param  low the low-surrogate code value to be tested

   934      * @return {@code true} if the specified high and

   935      * low-surrogate code values represent a valid surrogate pair;

   936      * {@code false} otherwise.

   937      * @since  1.5

   938      */

   939     public static boolean isSurrogatePair(char high, char low) {

   940         return isHighSurrogate(high) && isLowSurrogate(low);

   941     }

   943     /**

   944      * Determines the number of {@code char} values needed to

   945      * represent the specified character (Unicode code point). If the

   946      * specified character is equal to or greater than 0x10000, then

   947      * the method returns 2. Otherwise, the method returns 1.

   948      *

   949      * <p>This method doesn't validate the specified character to be a

   950      * valid Unicode code point. The caller must validate the

   951      * character value using {@link #isValidCodePoint(int) isValidCodePoint}

   952      * if necessary.

   953      *

   954      * @param   codePoint the character (Unicode code point) to be tested.

   955      * @return  2 if the character is a valid supplementary character; 1 otherwise.

   956      * @see     Character#isSupplementaryCodePoint(int)

   957      * @since   1.5

   958      */

   959     public static int charCount(int codePoint) {

   960         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;

   961     }

   963     /**

   964      * Converts the specified surrogate pair to its supplementary code

   965      * point value. This method does not validate the specified

   966      * surrogate pair. The caller must validate it using {@link

   967      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.

   968      *

   969      * @param  high the high-surrogate code unit

   970      * @param  low the low-surrogate code unit

   971      * @return the supplementary code point composed from the

   972      *         specified surrogate pair.

   973      * @since  1.5

   974      */

   975     public static int toCodePoint(char high, char low) {

   976         // Optimized form of:

   977         // return ((high - MIN_HIGH_SURROGATE) << 10)

   978         //         + (low - MIN_LOW_SURROGATE)

   979         //         + MIN_SUPPLEMENTARY_CODE_POINT;

   980         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT

   981                                        - (MIN_HIGH_SURROGATE << 10)

   982                                        - MIN_LOW_SURROGATE);

   983     }

   985     /**

   986      * Returns the code point at the given index of the

   987      * {@code CharSequence}. If the {@code char} value at

   988      * the given index in the {@code CharSequence} is in the

   989      * high-surrogate range, the following index is less than the

   990      * length of the {@code CharSequence}, and the

   991      * {@code char} value at the following index is in the

   992      * low-surrogate range, then the supplementary code point

   993      * corresponding to this surrogate pair is returned. Otherwise,

   994      * the {@code char} value at the given index is returned.

   995      *

   996      * @param seq a sequence of {@code char} values (Unicode code

   997      * units)

   998      * @param index the index to the {@code char} values (Unicode

   999      * code units) in {@code seq} to be converted

  1000      * @return the Unicode code point at the given index

  1001      * @exception NullPointerException if {@code seq} is null.

  1002      * @exception IndexOutOfBoundsException if the value

  1003      * {@code index} is negative or not less than

  1004      * {@link CharSequence#length() seq.length()}.

  1005      * @since  1.5

  1006      */

  1007     public static int codePointAt(CharSequence seq, int index) {

  1008         char c1 = seq.charAt(index++);

  1009         if (isHighSurrogate(c1)) {

  1010             if (index < seq.length()) {

  1011                 char c2 = seq.charAt(index);

  1012                 if (isLowSurrogate(c2)) {

  1013                     return toCodePoint(c1, c2);

  1014                 }

  1015             }

  1016         }

  1017         return c1;

  1018     }

  1020     /**

  1021      * Returns the code point at the given index of the

  1022      * {@code char} array. If the {@code char} value at

  1023      * the given index in the {@code char} array is in the

  1024      * high-surrogate range, the following index is less than the

  1025      * length of the {@code char} array, and the

  1026      * {@code char} value at the following index is in the

  1027      * low-surrogate range, then the supplementary code point

  1028      * corresponding to this surrogate pair is returned. Otherwise,

  1029      * the {@code char} value at the given index is returned.

  1030      *

  1031      * @param a the {@code char} array

  1032      * @param index the index to the {@code char} values (Unicode

  1033      * code units) in the {@code char} array to be converted

  1034      * @return the Unicode code point at the given index

  1035      * @exception NullPointerException if {@code a} is null.

  1036      * @exception IndexOutOfBoundsException if the value

  1037      * {@code index} is negative or not less than

  1038      * the length of the {@code char} array.

  1039      * @since  1.5

  1040      */

  1041     public static int codePointAt(char[] a, int index) {

  1042         return codePointAtImpl(a, index, a.length);

  1043     }

  1045     /**

  1046      * Returns the code point at the given index of the

  1047      * {@code char} array, where only array elements with

  1048      * {@code index} less than {@code limit} can be used. If

  1049      * the {@code char} value at the given index in the

  1050      * {@code char} array is in the high-surrogate range, the

  1051      * following index is less than the {@code limit}, and the

  1052      * {@code char} value at the following index is in the

  1053      * low-surrogate range, then the supplementary code point

  1054      * corresponding to this surrogate pair is returned. Otherwise,

  1055      * the {@code char} value at the given index is returned.

  1056      *

  1057      * @param a the {@code char} array

  1058      * @param index the index to the {@code char} values (Unicode

  1059      * code units) in the {@code char} array to be converted

  1060      * @param limit the index after the last array element that

  1061      * can be used in the {@code char} array

  1062      * @return the Unicode code point at the given index

  1063      * @exception NullPointerException if {@code a} is null.

  1064      * @exception IndexOutOfBoundsException if the {@code index}

  1065      * argument is negative or not less than the {@code limit}

  1066      * argument, or if the {@code limit} argument is negative or

  1067      * greater than the length of the {@code char} array.

  1068      * @since  1.5

  1069      */

  1070     public static int codePointAt(char[] a, int index, int limit) {

  1071         if (index >= limit || limit < 0 || limit > a.length) {

  1072             throw new IndexOutOfBoundsException();

  1073         }

  1074         return codePointAtImpl(a, index, limit);

  1075     }

  1077     // throws ArrayIndexOutofBoundsException if index out of bounds

  1078     static int codePointAtImpl(char[] a, int index, int limit) {

  1079         char c1 = a[index++];

  1080         if (isHighSurrogate(c1)) {

  1081             if (index < limit) {

  1082                 char c2 = a[index];

  1083                 if (isLowSurrogate(c2)) {

  1084                     return toCodePoint(c1, c2);

  1085                 }

  1086             }

  1087         }

  1088         return c1;

  1089     }

  1091     /**

  1092      * Returns the code point preceding the given index of the

  1093      * {@code CharSequence}. If the {@code char} value at

  1094      * {@code (index - 1)} in the {@code CharSequence} is in

  1095      * the low-surrogate range, {@code (index - 2)} is not

  1096      * negative, and the {@code char} value at {@code (index - 2)}

  1097      * in the {@code CharSequence} is in the

  1098      * high-surrogate range, then the supplementary code point

  1099      * corresponding to this surrogate pair is returned. Otherwise,

  1100      * the {@code char} value at {@code (index - 1)} is

  1101      * returned.

  1102      *

  1103      * @param seq the {@code CharSequence} instance

  1104      * @param index the index following the code point that should be returned

  1105      * @return the Unicode code point value before the given index.

  1106      * @exception NullPointerException if {@code seq} is null.

  1107      * @exception IndexOutOfBoundsException if the {@code index}

  1108      * argument is less than 1 or greater than {@link

  1109      * CharSequence#length() seq.length()}.

  1110      * @since  1.5

  1111      */

  1112     public static int codePointBefore(CharSequence seq, int index) {

  1113         char c2 = seq.charAt(--index);

  1114         if (isLowSurrogate(c2)) {

  1115             if (index > 0) {

  1116                 char c1 = seq.charAt(--index);

  1117                 if (isHighSurrogate(c1)) {

  1118                     return toCodePoint(c1, c2);

  1119                 }

  1120             }

  1121         }

  1122         return c2;

  1123     }

  1125     /**

  1126      * Returns the code point preceding the given index of the

  1127      * {@code char} array. If the {@code char} value at

  1128      * {@code (index - 1)} in the {@code char} array is in

  1129      * the low-surrogate range, {@code (index - 2)} is not

  1130      * negative, and the {@code char} value at {@code (index - 2)}

  1131      * in the {@code char} array is in the

  1132      * high-surrogate range, then the supplementary code point

  1133      * corresponding to this surrogate pair is returned. Otherwise,

  1134      * the {@code char} value at {@code (index - 1)} is

  1135      * returned.

  1136      *

  1137      * @param a the {@code char} array

  1138      * @param index the index following the code point that should be returned

  1139      * @return the Unicode code point value before the given index.

  1140      * @exception NullPointerException if {@code a} is null.

  1141      * @exception IndexOutOfBoundsException if the {@code index}

  1142      * argument is less than 1 or greater than the length of the

  1143      * {@code char} array

  1144      * @since  1.5

  1145      */

  1146     public static int codePointBefore(char[] a, int index) {

  1147         return codePointBeforeImpl(a, index, 0);

  1148     }

  1150     /**

  1151      * Returns the code point preceding the given index of the

  1152      * {@code char} array, where only array elements with

  1153      * {@code index} greater than or equal to {@code start}

  1154      * can be used. If the {@code char} value at {@code (index - 1)}

  1155      * in the {@code char} array is in the

  1156      * low-surrogate range, {@code (index - 2)} is not less than

  1157      * {@code start}, and the {@code char} value at

  1158      * {@code (index - 2)} in the {@code char} array is in

  1159      * the high-surrogate range, then the supplementary code point

  1160      * corresponding to this surrogate pair is returned. Otherwise,

  1161      * the {@code char} value at {@code (index - 1)} is

  1162      * returned.

  1163      *

  1164      * @param a the {@code char} array

  1165      * @param index the index following the code point that should be returned

  1166      * @param start the index of the first array element in the

  1167      * {@code char} array

  1168      * @return the Unicode code point value before the given index.

  1169      * @exception NullPointerException if {@code a} is null.

  1170      * @exception IndexOutOfBoundsException if the {@code index}

  1171      * argument is not greater than the {@code start} argument or

  1172      * is greater than the length of the {@code char} array, or

  1173      * if the {@code start} argument is negative or not less than

  1174      * the length of the {@code char} array.

  1175      * @since  1.5

  1176      */

  1177     public static int codePointBefore(char[] a, int index, int start) {

  1178         if (index <= start || start < 0 || start >= a.length) {

  1179             throw new IndexOutOfBoundsException();

  1180         }

  1181         return codePointBeforeImpl(a, index, start);

  1182     }

  1184     // throws ArrayIndexOutofBoundsException if index-1 out of bounds

  1185     static int codePointBeforeImpl(char[] a, int index, int start) {

  1186         char c2 = a[--index];

  1187         if (isLowSurrogate(c2)) {

  1188             if (index > start) {

  1189                 char c1 = a[--index];

  1190                 if (isHighSurrogate(c1)) {

  1191                     return toCodePoint(c1, c2);

  1192                 }

  1193             }

  1194         }

  1195         return c2;

  1196     }

  1198     /**

  1199      * Returns the leading surrogate (a

  1200      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

  1201      * high surrogate code unit</a>) of the

  1202      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  1203      * surrogate pair</a>

  1204      * representing the specified supplementary character (Unicode

  1205      * code point) in the UTF-16 encoding.  If the specified character

  1206      * is not a

  1207      * <a href="Character.html#supplementary">supplementary character</a>,

  1208      * an unspecified {@code char} is returned.

  1209      *

  1210      * <p>If

  1211      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}

  1212      * is {@code true}, then

  1213      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and

  1214      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}

  1215      * are also always {@code true}.

  1216      *

  1217      * @param   codePoint a supplementary character (Unicode code point)

  1218      * @return  the leading surrogate code unit used to represent the

  1219      *          character in the UTF-16 encoding

  1220      * @since   1.7

  1221      */

  1222     public static char highSurrogate(int codePoint) {

  1223         return (char) ((codePoint >>> 10)

  1224             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));

  1225     }

  1227     /**

  1228      * Returns the trailing surrogate (a

  1229      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

  1230      * low surrogate code unit</a>) of the

  1231      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  1232      * surrogate pair</a>

  1233      * representing the specified supplementary character (Unicode

  1234      * code point) in the UTF-16 encoding.  If the specified character

  1235      * is not a

  1236      * <a href="Character.html#supplementary">supplementary character</a>,

  1237      * an unspecified {@code char} is returned.

  1238      *

  1239      * <p>If

  1240      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}

  1241      * is {@code true}, then

  1242      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and

  1243      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}

  1244      * are also always {@code true}.

  1245      *

  1246      * @param   codePoint a supplementary character (Unicode code point)

  1247      * @return  the trailing surrogate code unit used to represent the

  1248      *          character in the UTF-16 encoding

  1249      * @since   1.7

  1250      */

  1251     public static char lowSurrogate(int codePoint) {

  1252         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);

  1253     }

  1255     /**

  1256      * Converts the specified character (Unicode code point) to its

  1257      * UTF-16 representation. If the specified code point is a BMP

  1258      * (Basic Multilingual Plane or Plane 0) value, the same value is

  1259      * stored in {@code dst[dstIndex]}, and 1 is returned. If the

  1260      * specified code point is a supplementary character, its

  1261      * surrogate values are stored in {@code dst[dstIndex]}

  1262      * (high-surrogate) and {@code dst[dstIndex+1]}

  1263      * (low-surrogate), and 2 is returned.

  1264      *

  1265      * @param  codePoint the character (Unicode code point) to be converted.

  1266      * @param  dst an array of {@code char} in which the

  1267      * {@code codePoint}'s UTF-16 value is stored.

  1268      * @param dstIndex the start index into the {@code dst}

  1269      * array where the converted value is stored.

  1270      * @return 1 if the code point is a BMP code point, 2 if the

  1271      * code point is a supplementary code point.

  1272      * @exception IllegalArgumentException if the specified

  1273      * {@code codePoint} is not a valid Unicode code point.

  1274      * @exception NullPointerException if the specified {@code dst} is null.

  1275      * @exception IndexOutOfBoundsException if {@code dstIndex}

  1276      * is negative or not less than {@code dst.length}, or if

  1277      * {@code dst} at {@code dstIndex} doesn't have enough

  1278      * array element(s) to store the resulting {@code char}

  1279      * value(s). (If {@code dstIndex} is equal to

  1280      * {@code dst.length-1} and the specified

  1281      * {@code codePoint} is a supplementary character, the

  1282      * high-surrogate value is not stored in

  1283      * {@code dst[dstIndex]}.)

  1284      * @since  1.5

  1285      */

  1286     public static int toChars(int codePoint, char[] dst, int dstIndex) {

  1287         if (isBmpCodePoint(codePoint)) {

  1288             dst[dstIndex] = (char) codePoint;

  1289             return 1;

  1290         } else if (isValidCodePoint(codePoint)) {

  1291             toSurrogates(codePoint, dst, dstIndex);

  1292             return 2;

  1293         } else {

  1294             throw new IllegalArgumentException();

  1295         }

  1296     }

  1298     /**

  1299      * Converts the specified character (Unicode code point) to its

  1300      * UTF-16 representation stored in a {@code char} array. If

  1301      * the specified code point is a BMP (Basic Multilingual Plane or

  1302      * Plane 0) value, the resulting {@code char} array has

  1303      * the same value as {@code codePoint}. If the specified code

  1304      * point is a supplementary code point, the resulting

  1305      * {@code char} array has the corresponding surrogate pair.

  1306      *

  1307      * @param  codePoint a Unicode code point

  1308      * @return a {@code char} array having

  1309      *         {@code codePoint}'s UTF-16 representation.

  1310      * @exception IllegalArgumentException if the specified

  1311      * {@code codePoint} is not a valid Unicode code point.

  1312      * @since  1.5

  1313      */

  1314     public static char[] toChars(int codePoint) {

  1315         if (isBmpCodePoint(codePoint)) {

  1316             return new char[] { (char) codePoint };

  1317         } else if (isValidCodePoint(codePoint)) {

  1318             char[] result = new char[2];

  1319             toSurrogates(codePoint, result, 0);

  1320             return result;

  1321         } else {

  1322             throw new IllegalArgumentException();

  1323         }

  1324     }

  1326     static void toSurrogates(int codePoint, char[] dst, int index) {

  1327         // We write elements "backwards" to guarantee all-or-nothing

  1328         dst[index+1] = lowSurrogate(codePoint);

  1329         dst[index] = highSurrogate(codePoint);

  1330     }

  1332     /**

  1333      * Returns the number of Unicode code points in the text range of

  1334      * the specified char sequence. The text range begins at the

  1335      * specified {@code beginIndex} and extends to the

  1336      * {@code char} at index {@code endIndex - 1}. Thus the

  1337      * length (in {@code char}s) of the text range is

  1338      * {@code endIndex-beginIndex}. Unpaired surrogates within

  1339      * the text range count as one code point each.

  1340      *

  1341      * @param seq the char sequence

  1342      * @param beginIndex the index to the first {@code char} of

  1343      * the text range.

  1344      * @param endIndex the index after the last {@code char} of

  1345      * the text range.

  1346      * @return the number of Unicode code points in the specified text

  1347      * range

  1348      * @exception NullPointerException if {@code seq} is null.

  1349      * @exception IndexOutOfBoundsException if the

  1350      * {@code beginIndex} is negative, or {@code endIndex}

  1351      * is larger than the length of the given sequence, or

  1352      * {@code beginIndex} is larger than {@code endIndex}.

  1353      * @since  1.5

  1354      */

  1355     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {

  1356         int length = seq.length();

  1357         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {

  1358             throw new IndexOutOfBoundsException();

  1359         }

  1360         int n = endIndex - beginIndex;

  1361         for (int i = beginIndex; i < endIndex; ) {

  1362             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&

  1363                 isLowSurrogate(seq.charAt(i))) {

  1364                 n--;

  1365                 i++;

  1366             }

  1367         }

  1368         return n;

  1369     }

  1371     /**

  1372      * Returns the number of Unicode code points in a subarray of the

  1373      * {@code char} array argument. The {@code offset}

  1374      * argument is the index of the first {@code char} of the

  1375      * subarray and the {@code count} argument specifies the

  1376      * length of the subarray in {@code char}s. Unpaired

  1377      * surrogates within the subarray count as one code point each.

  1378      *

  1379      * @param a the {@code char} array

  1380      * @param offset the index of the first {@code char} in the

  1381      * given {@code char} array

  1382      * @param count the length of the subarray in {@code char}s

  1383      * @return the number of Unicode code points in the specified subarray

  1384      * @exception NullPointerException if {@code a} is null.

  1385      * @exception IndexOutOfBoundsException if {@code offset} or

  1386      * {@code count} is negative, or if {@code offset +

  1387      * count} is larger than the length of the given array.

  1388      * @since  1.5

  1389      */

  1390     public static int codePointCount(char[] a, int offset, int count) {

  1391         if (count > a.length - offset || offset < 0 || count < 0) {

  1392             throw new IndexOutOfBoundsException();

  1393         }

  1394         return codePointCountImpl(a, offset, count);

  1395     }

  1397     static int codePointCountImpl(char[] a, int offset, int count) {

  1398         int endIndex = offset + count;

  1399         int n = count;

  1400         for (int i = offset; i < endIndex; ) {

  1401             if (isHighSurrogate(a[i++]) && i < endIndex &&

  1402                 isLowSurrogate(a[i])) {

  1403                 n--;

  1404                 i++;

  1405             }

  1406         }

  1407         return n;

  1408     }

  1410     /**

  1411      * Returns the index within the given char sequence that is offset

  1412      * from the given {@code index} by {@code codePointOffset}

  1413      * code points. Unpaired surrogates within the text range given by

  1414      * {@code index} and {@code codePointOffset} count as

  1415      * one code point each.

  1416      *

  1417      * @param seq the char sequence

  1418      * @param index the index to be offset

  1419      * @param codePointOffset the offset in code points

  1420      * @return the index within the char sequence

  1421      * @exception NullPointerException if {@code seq} is null.

  1422      * @exception IndexOutOfBoundsException if {@code index}

  1423      *   is negative or larger then the length of the char sequence,

  1424      *   or if {@code codePointOffset} is positive and the

  1425      *   subsequence starting with {@code index} has fewer than

  1426      *   {@code codePointOffset} code points, or if

  1427      *   {@code codePointOffset} is negative and the subsequence

  1428      *   before {@code index} has fewer than the absolute value

  1429      *   of {@code codePointOffset} code points.

  1430      * @since 1.5

  1431      */

  1432     public static int offsetByCodePoints(CharSequence seq, int index,

  1433                                          int codePointOffset) {

  1434         int length = seq.length();

  1435         if (index < 0 || index > length) {

  1436             throw new IndexOutOfBoundsException();

  1437         }

  1439         int x = index;

  1440         if (codePointOffset >= 0) {

  1441             int i;

  1442             for (i = 0; x < length && i < codePointOffset; i++) {

  1443                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&

  1444                     isLowSurrogate(seq.charAt(x))) {

  1445                     x++;

  1446                 }

  1447             }

  1448             if (i < codePointOffset) {

  1449                 throw new IndexOutOfBoundsException();

  1450             }

  1451         } else {

  1452             int i;

  1453             for (i = codePointOffset; x > 0 && i < 0; i++) {

  1454                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&

  1455                     isHighSurrogate(seq.charAt(x-1))) {

  1456                     x--;

  1457                 }

  1458             }

  1459             if (i < 0) {

  1460                 throw new IndexOutOfBoundsException();

  1461             }

  1462         }

  1463         return x;

  1464     }

  1466     /**

  1467      * Returns the index within the given {@code char} subarray

  1468      * that is offset from the given {@code index} by

  1469      * {@code codePointOffset} code points. The

  1470      * {@code start} and {@code count} arguments specify a

  1471      * subarray of the {@code char} array. Unpaired surrogates

  1472      * within the text range given by {@code index} and

  1473      * {@code codePointOffset} count as one code point each.

  1474      *

  1475      * @param a the {@code char} array

  1476      * @param start the index of the first {@code char} of the

  1477      * subarray

  1478      * @param count the length of the subarray in {@code char}s

  1479      * @param index the index to be offset

  1480      * @param codePointOffset the offset in code points

  1481      * @return the index within the subarray

  1482      * @exception NullPointerException if {@code a} is null.

  1483      * @exception IndexOutOfBoundsException

  1484      *   if {@code start} or {@code count} is negative,

  1485      *   or if {@code start + count} is larger than the length of

  1486      *   the given array,

  1487      *   or if {@code index} is less than {@code start} or

  1488      *   larger then {@code start + count},

  1489      *   or if {@code codePointOffset} is positive and the text range

  1490      *   starting with {@code index} and ending with {@code start + count - 1}

  1491      *   has fewer than {@code codePointOffset} code

  1492      *   points,

  1493      *   or if {@code codePointOffset} is negative and the text range

  1494      *   starting with {@code start} and ending with {@code index - 1}

  1495      *   has fewer than the absolute value of

  1496      *   {@code codePointOffset} code points.

  1497      * @since 1.5

  1498      */

  1499     public static int offsetByCodePoints(char[] a, int start, int count,

  1500                                          int index, int codePointOffset) {

  1501         if (count > a.length-start || start < 0 || count < 0

  1502             || index < start || index > start+count) {

  1503             throw new IndexOutOfBoundsException();

  1504         }

  1505         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);

  1506     }

  1508     static int offsetByCodePointsImpl(char[]a, int start, int count,

  1509                                       int index, int codePointOffset) {

  1510         int x = index;

  1511         if (codePointOffset >= 0) {

  1512             int limit = start + count;

  1513             int i;

  1514             for (i = 0; x < limit && i < codePointOffset; i++) {

  1515                 if (isHighSurrogate(a[x++]) && x < limit &&

  1516                     isLowSurrogate(a[x])) {

  1517                     x++;

  1518                 }

  1519             }

  1520             if (i < codePointOffset) {

  1521                 throw new IndexOutOfBoundsException();

  1522             }

  1523         } else {

  1524             int i;

  1525             for (i = codePointOffset; x > start && i < 0; i++) {

  1526                 if (isLowSurrogate(a[--x]) && x > start &&

  1527                     isHighSurrogate(a[x-1])) {

  1528                     x--;

  1529                 }

  1530             }

  1531             if (i < 0) {

  1532                 throw new IndexOutOfBoundsException();

  1533             }

  1534         }

  1535         return x;

  1536     }

  1538     /**

  1539      * Determines if the specified character is a lowercase character.

  1540      * <p>

  1541      * A character is lowercase if its general category type, provided

  1542      * by {@code Character.getType(ch)}, is

  1543      * {@code LOWERCASE_LETTER}, or it has contributory property

  1544      * Other_Lowercase as defined by the Unicode Standard.

  1545      * <p>

  1546      * The following are examples of lowercase characters:

  1547      * <p><blockquote><pre>

  1548      * a b c d e f g h i j k l m n o p q r s t u v w x y z

  1549      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'

  1550      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'

  1551      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'

  1552      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'

  1553      * </pre></blockquote>

  1554      * <p> Many other Unicode characters are lowercase too.

  1555      *

  1556      * <p><b>Note:</b> This method cannot handle <a

  1557      * href="#supplementary"> supplementary characters</a>. To support

  1558      * all Unicode characters, including supplementary characters, use

  1559      * the {@link #isLowerCase(int)} method.

  1560      *

  1561      * @param   ch   the character to be tested.

  1562      * @return  {@code true} if the character is lowercase;

  1563      *          {@code false} otherwise.

  1564      * @see     Character#isLowerCase(char)

  1565      * @see     Character#isTitleCase(char)

  1566      * @see     Character#toLowerCase(char)

  1567      * @see     Character#getType(char)

  1568      */

  1569     public static boolean isLowerCase(char ch) {

  1570         return ch == toLowerCase(ch);

  1571     }

  1573     /**

  1574      * Determines if the specified character is an uppercase character.

  1575      * <p>

  1576      * A character is uppercase if its general category type, provided by

  1577      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.

  1578      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.

  1579      * <p>

  1580      * The following are examples of uppercase characters:

  1581      * <p><blockquote><pre>

  1582      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

  1583      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'

  1584      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'

  1585      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'

  1586      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'

  1587      * </pre></blockquote>

  1588      * <p> Many other Unicode characters are uppercase too.<p>

  1589      *

  1590      * <p><b>Note:</b> This method cannot handle <a

  1591      * href="#supplementary"> supplementary characters</a>. To support

  1592      * all Unicode characters, including supplementary characters, use

  1593      * the {@link #isUpperCase(int)} method.

  1594      *

  1595      * @param   ch   the character to be tested.

  1596      * @return  {@code true} if the character is uppercase;

  1597      *          {@code false} otherwise.

  1598      * @see     Character#isLowerCase(char)

  1599      * @see     Character#isTitleCase(char)

  1600      * @see     Character#toUpperCase(char)

  1601      * @see     Character#getType(char)

  1602      * @since   1.0

  1603      */

  1604     public static boolean isUpperCase(char ch) {

  1605         return ch == toUpperCase(ch);

  1606     }

  1608     /**

  1609      * Determines if the specified character is a titlecase character.

  1610      * <p>

  1611      * A character is a titlecase character if its general

  1612      * category type, provided by {@code Character.getType(ch)},

  1613      * is {@code TITLECASE_LETTER}.

  1614      * <p>

  1615      * Some characters look like pairs of Latin letters. For example, there

  1616      * is an uppercase letter that looks like "LJ" and has a corresponding

  1617      * lowercase letter that looks like "lj". A third form, which looks like "Lj",

  1618      * is the appropriate form to use when rendering a word in lowercase

  1619      * with initial capitals, as for a book title.

  1620      * <p>

  1621      * These are some of the Unicode characters for which this method returns

  1622      * {@code true}:

  1623      * <ul>

  1624      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}

  1625      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}

  1626      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}

  1627      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}

  1628      * </ul>

  1629      * <p> Many other Unicode characters are titlecase too.<p>

  1630      *

  1631      * <p><b>Note:</b> This method cannot handle <a

  1632      * href="#supplementary"> supplementary characters</a>. To support

  1633      * all Unicode characters, including supplementary characters, use

  1634      * the {@link #isTitleCase(int)} method.

  1635      *

  1636      * @param   ch   the character to be tested.

  1637      * @return  {@code true} if the character is titlecase;

  1638      *          {@code false} otherwise.

  1639      * @see     Character#isLowerCase(char)

  1640      * @see     Character#isUpperCase(char)

  1641      * @see     Character#toTitleCase(char)

  1642      * @see     Character#getType(char)

  1643      * @since   1.0.2

  1644      */

  1645     public static boolean isTitleCase(char ch) {

  1646         return isTitleCase((int)ch);

  1647     }

  1649     /**

  1650      * Determines if the specified character (Unicode code point) is a titlecase character.

  1651      * <p>

  1652      * A character is a titlecase character if its general

  1653      * category type, provided by {@link Character#getType(int) getType(codePoint)},

  1654      * is {@code TITLECASE_LETTER}.

  1655      * <p>

  1656      * Some characters look like pairs of Latin letters. For example, there

  1657      * is an uppercase letter that looks like "LJ" and has a corresponding

  1658      * lowercase letter that looks like "lj". A third form, which looks like "Lj",

  1659      * is the appropriate form to use when rendering a word in lowercase

  1660      * with initial capitals, as for a book title.

  1661      * <p>

  1662      * These are some of the Unicode characters for which this method returns

  1663      * {@code true}:

  1664      * <ul>

  1665      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}

  1666      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}

  1667      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}

  1668      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}

  1669      * </ul>

  1670      * <p> Many other Unicode characters are titlecase too.<p>

  1671      *

  1672      * @param   codePoint the character (Unicode code point) to be tested.

  1673      * @return  {@code true} if the character is titlecase;

  1674      *          {@code false} otherwise.

  1675      * @see     Character#isLowerCase(int)

  1676      * @see     Character#isUpperCase(int)

  1677      * @see     Character#toTitleCase(int)

  1678      * @see     Character#getType(int)

  1679      * @since   1.5

  1680      */

  1681     public static boolean isTitleCase(int codePoint) {

  1682         return getType(codePoint) == Character.TITLECASE_LETTER;

  1683     }

  1685     /**

  1686      * Determines if the specified character is a digit.

  1687      * <p>

  1688      * A character is a digit if its general category type, provided

  1689      * by {@code Character.getType(ch)}, is

  1690      * {@code DECIMAL_DIGIT_NUMBER}.

  1691      * <p>

  1692      * Some Unicode character ranges that contain digits:

  1693      * <ul>

  1694      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},

  1695      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})

  1696      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},

  1697      *     Arabic-Indic digits

  1698      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},

  1699      *     Extended Arabic-Indic digits

  1700      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},

  1701      *     Devanagari digits

  1702      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},

  1703      *     Fullwidth digits

  1704      * </ul>

  1705      *

  1706      * Many other character ranges contain digits as well.

  1707      *

  1708      * <p><b>Note:</b> This method cannot handle <a

  1709      * href="#supplementary"> supplementary characters</a>. To support

  1710      * all Unicode characters, including supplementary characters, use

  1711      * the {@link #isDigit(int)} method.

  1712      *

  1713      * @param   ch   the character to be tested.

  1714      * @return  {@code true} if the character is a digit;

  1715      *          {@code false} otherwise.

  1716      * @see     Character#digit(char, int)

  1717      * @see     Character#forDigit(int, int)

  1718      * @see     Character#getType(char)

  1719      */

  1720     public static boolean isDigit(char ch) {

  1721         return String.valueOf(ch).matches("\\d");

  1722     }

  1724     /**

  1725      * Determines if the specified character (Unicode code point) is a digit.

  1726      * <p>

  1727      * A character is a digit if its general category type, provided

  1728      * by {@link Character#getType(int) getType(codePoint)}, is

  1729      * {@code DECIMAL_DIGIT_NUMBER}.

  1730      * <p>

  1731      * Some Unicode character ranges that contain digits:

  1732      * <ul>

  1733      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},

  1734      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})

  1735      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},

  1736      *     Arabic-Indic digits

  1737      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},

  1738      *     Extended Arabic-Indic digits

  1739      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},

  1740      *     Devanagari digits

  1741      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},

  1742      *     Fullwidth digits

  1743      * </ul>

  1744      *

  1745      * Many other character ranges contain digits as well.

  1746      *

  1747      * @param   codePoint the character (Unicode code point) to be tested.

  1748      * @return  {@code true} if the character is a digit;

  1749      *          {@code false} otherwise.

  1750      * @see     Character#forDigit(int, int)

  1751      * @see     Character#getType(int)

  1752      * @since   1.5

  1753      */

  1754     public static boolean isDigit(int codePoint) {

  1755         return fromCodeChars(codePoint).matches("\\d");

  1756     }

  1758     @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")

  1759     private native static String fromCodeChars(int codePoint);

  1761     /**

  1762      * Determines if a character is defined in Unicode.

  1763      * <p>

  1764      * A character is defined if at least one of the following is true:

  1765      * <ul>

  1766      * <li>It has an entry in the UnicodeData file.

  1767      * <li>It has a value in a range defined by the UnicodeData file.

  1768      * </ul>

  1769      *

  1770      * <p><b>Note:</b> This method cannot handle <a

  1771      * href="#supplementary"> supplementary characters</a>. To support

  1772      * all Unicode characters, including supplementary characters, use

  1773      * the {@link #isDefined(int)} method.

  1774      *

  1775      * @param   ch   the character to be tested

  1776      * @return  {@code true} if the character has a defined meaning

  1777      *          in Unicode; {@code false} otherwise.

  1778      * @see     Character#isDigit(char)

  1779      * @see     Character#isLetter(char)

  1780      * @see     Character#isLetterOrDigit(char)

  1781      * @see     Character#isLowerCase(char)

  1782      * @see     Character#isTitleCase(char)

  1783      * @see     Character#isUpperCase(char)

  1784      * @since   1.0.2

  1785      */

  1786     public static boolean isDefined(char ch) {

  1787         return isDefined((int)ch);

  1788     }

  1790     /**

  1791      * Determines if a character (Unicode code point) is defined in Unicode.

  1792      * <p>

  1793      * A character is defined if at least one of the following is true:

  1794      * <ul>

  1795      * <li>It has an entry in the UnicodeData file.

  1796      * <li>It has a value in a range defined by the UnicodeData file.

  1797      * </ul>

  1798      *

  1799      * @param   codePoint the character (Unicode code point) to be tested.

  1800      * @return  {@code true} if the character has a defined meaning

  1801      *          in Unicode; {@code false} otherwise.

  1802      * @see     Character#isDigit(int)

  1803      * @see     Character#isLetter(int)

  1804      * @see     Character#isLetterOrDigit(int)

  1805      * @see     Character#isLowerCase(int)

  1806      * @see     Character#isTitleCase(int)

  1807      * @see     Character#isUpperCase(int)

  1808      * @since   1.5

  1809      */

  1810     public static boolean isDefined(int codePoint) {

  1811         return getType(codePoint) != Character.UNASSIGNED;

  1812     }

  1814     /**

  1815      * Determines if the specified character is a letter.

  1816      * <p>

  1817      * A character is considered to be a letter if its general

  1818      * category type, provided by {@code Character.getType(ch)},

  1819      * is any of the following:

  1820      * <ul>

  1821      * <li> {@code UPPERCASE_LETTER}

  1822      * <li> {@code LOWERCASE_LETTER}

  1823      * <li> {@code TITLECASE_LETTER}

  1824      * <li> {@code MODIFIER_LETTER}

  1825      * <li> {@code OTHER_LETTER}

  1826      * </ul>

  1827      *

  1828      * Not all letters have case. Many characters are

  1829      * letters but are neither uppercase nor lowercase nor titlecase.

  1830      *

  1831      * <p><b>Note:</b> This method cannot handle <a

  1832      * href="#supplementary"> supplementary characters</a>. To support

  1833      * all Unicode characters, including supplementary characters, use

  1834      * the {@link #isLetter(int)} method.

  1835      *

  1836      * @param   ch   the character to be tested.

  1837      * @return  {@code true} if the character is a letter;

  1838      *          {@code false} otherwise.

  1839      * @see     Character#isDigit(char)

  1840      * @see     Character#isJavaIdentifierStart(char)

  1841      * @see     Character#isJavaLetter(char)

  1842      * @see     Character#isJavaLetterOrDigit(char)

  1843      * @see     Character#isLetterOrDigit(char)

  1844      * @see     Character#isLowerCase(char)

  1845      * @see     Character#isTitleCase(char)

  1846      * @see     Character#isUnicodeIdentifierStart(char)

  1847      * @see     Character#isUpperCase(char)

  1848      */

  1849     public static boolean isLetter(char ch) {

  1850         return String.valueOf(ch).matches("\\w") && !isDigit(ch);

  1851     }

  1853     /**

  1854      * Determines if the specified character (Unicode code point) is a letter.

  1855      * <p>

  1856      * A character is considered to be a letter if its general

  1857      * category type, provided by {@link Character#getType(int) getType(codePoint)},

  1858      * is any of the following:

  1859      * <ul>

  1860      * <li> {@code UPPERCASE_LETTER}

  1861      * <li> {@code LOWERCASE_LETTER}

  1862      * <li> {@code TITLECASE_LETTER}

  1863      * <li> {@code MODIFIER_LETTER}

  1864      * <li> {@code OTHER_LETTER}

  1865      * </ul>

  1866      *

  1867      * Not all letters have case. Many characters are

  1868      * letters but are neither uppercase nor lowercase nor titlecase.

  1869      *

  1870      * @param   codePoint the character (Unicode code point) to be tested.

  1871      * @return  {@code true} if the character is a letter;

  1872      *          {@code false} otherwise.

  1873      * @see     Character#isDigit(int)

  1874      * @see     Character#isJavaIdentifierStart(int)

  1875      * @see     Character#isLetterOrDigit(int)

  1876      * @see     Character#isLowerCase(int)

  1877      * @see     Character#isTitleCase(int)

  1878      * @see     Character#isUnicodeIdentifierStart(int)

  1879      * @see     Character#isUpperCase(int)

  1880      * @since   1.5

  1881      */

  1882     public static boolean isLetter(int codePoint) {

  1883         return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);

  1884     }

  1886     /**

  1887      * Determines if the specified character is a letter or digit.

  1888      * <p>

  1889      * A character is considered to be a letter or digit if either

  1890      * {@code Character.isLetter(char ch)} or

  1891      * {@code Character.isDigit(char ch)} returns

  1892      * {@code true} for the character.

  1893      *

  1894      * <p><b>Note:</b> This method cannot handle <a

  1895      * href="#supplementary"> supplementary characters</a>. To support

  1896      * all Unicode characters, including supplementary characters, use

  1897      * the {@link #isLetterOrDigit(int)} method.

  1898      *

  1899      * @param   ch   the character to be tested.

  1900      * @return  {@code true} if the character is a letter or digit;

  1901      *          {@code false} otherwise.

  1902      * @see     Character#isDigit(char)

  1903      * @see     Character#isJavaIdentifierPart(char)

  1904      * @see     Character#isJavaLetter(char)

  1905      * @see     Character#isJavaLetterOrDigit(char)

  1906      * @see     Character#isLetter(char)

  1907      * @see     Character#isUnicodeIdentifierPart(char)

  1908      * @since   1.0.2

  1909      */

  1910     public static boolean isLetterOrDigit(char ch) {

  1911         return String.valueOf(ch).matches("\\w");

  1912     }

  1914     /**

  1915      * Determines if the specified character (Unicode code point) is a letter or digit.

  1916      * <p>

  1917      * A character is considered to be a letter or digit if either

  1918      * {@link #isLetter(int) isLetter(codePoint)} or

  1919      * {@link #isDigit(int) isDigit(codePoint)} returns

  1920      * {@code true} for the character.

  1921      *

  1922      * @param   codePoint the character (Unicode code point) to be tested.

  1923      * @return  {@code true} if the character is a letter or digit;

  1924      *          {@code false} otherwise.

  1925      * @see     Character#isDigit(int)

  1926      * @see     Character#isJavaIdentifierPart(int)

  1927      * @see     Character#isLetter(int)

  1928      * @see     Character#isUnicodeIdentifierPart(int)

  1929      * @since   1.5

  1930      */

  1931     public static boolean isLetterOrDigit(int codePoint) {

  1932         return fromCodeChars(codePoint).matches("\\w");

  1933     }

  1935     public static int getType(int x) {

  1936         throw new UnsupportedOperationException("getType: " + (char)x);

  1937     }

  1939     /**

  1940      * Determines if the specified character is

  1941      * permissible as the first character in a Java identifier.

  1942      * <p>

  1943      * A character may start a Java identifier if and only if

  1944      * one of the following conditions is true:

  1945      * <ul>

  1946      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}

  1947      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}

  1948      * <li> {@code ch} is a currency symbol (such as {@code '$'})

  1949      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).

  1950      * </ul>

  1951      *

  1952      * <p><b>Note:</b> This method cannot handle <a

  1953      * href="#supplementary"> supplementary characters</a>. To support

  1954      * all Unicode characters, including supplementary characters, use

  1955      * the {@link #isJavaIdentifierStart(int)} method.

  1956      *

  1957      * @param   ch the character to be tested.

  1958      * @return  {@code true} if the character may start a Java identifier;

  1959      *          {@code false} otherwise.

  1960      * @see     Character#isJavaIdentifierPart(char)

  1961      * @see     Character#isLetter(char)

  1962      * @see     Character#isUnicodeIdentifierStart(char)

  1963      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  1964      * @since   1.1

  1965      */

  1966     public static boolean isJavaIdentifierStart(char ch) {

  1967         return isJavaIdentifierStart((int)ch);

  1968     }

  1970     /**

  1971      * Determines if the character (Unicode code point) is

  1972      * permissible as the first character in a Java identifier.

  1973      * <p>

  1974      * A character may start a Java identifier if and only if

  1975      * one of the following conditions is true:

  1976      * <ul>

  1977      * <li> {@link #isLetter(int) isLetter(codePoint)}

  1978      *      returns {@code true}

  1979      * <li> {@link #getType(int) getType(codePoint)}

  1980      *      returns {@code LETTER_NUMBER}

  1981      * <li> the referenced character is a currency symbol (such as {@code '$'})

  1982      * <li> the referenced character is a connecting punctuation character

  1983      *      (such as {@code '_'}).

  1984      * </ul>

  1985      *

  1986      * @param   codePoint the character (Unicode code point) to be tested.

  1987      * @return  {@code true} if the character may start a Java identifier;

  1988      *          {@code false} otherwise.

  1989      * @see     Character#isJavaIdentifierPart(int)

  1990      * @see     Character#isLetter(int)

  1991      * @see     Character#isUnicodeIdentifierStart(int)

  1992      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  1993      * @since   1.5

  1994      */

  1995     public static boolean isJavaIdentifierStart(int codePoint) {

  1996         return

  1997             ('A' <= codePoint && codePoint <= 'Z') ||

  1998             ('a' <= codePoint && codePoint <= 'z') ||

  1999             codePoint == '$';

  2000     }

  2002     /**

  2003      * Determines if the specified character may be part of a Java

  2004      * identifier as other than the first character.

  2005      * <p>

  2006      * A character may be part of a Java identifier if any of the following

  2007      * are true:

  2008      * <ul>

  2009      * <li>  it is a letter

  2010      * <li>  it is a currency symbol (such as {@code '$'})

  2011      * <li>  it is a connecting punctuation character (such as {@code '_'})

  2012      * <li>  it is a digit

  2013      * <li>  it is a numeric letter (such as a Roman numeral character)

  2014      * <li>  it is a combining mark

  2015      * <li>  it is a non-spacing mark

  2016      * <li> {@code isIdentifierIgnorable} returns

  2017      * {@code true} for the character

  2018      * </ul>

  2019      *

  2020      * <p><b>Note:</b> This method cannot handle <a

  2021      * href="#supplementary"> supplementary characters</a>. To support

  2022      * all Unicode characters, including supplementary characters, use

  2023      * the {@link #isJavaIdentifierPart(int)} method.

  2024      *

  2025      * @param   ch      the character to be tested.

  2026      * @return {@code true} if the character may be part of a

  2027      *          Java identifier; {@code false} otherwise.

  2028      * @see     Character#isIdentifierIgnorable(char)

  2029      * @see     Character#isJavaIdentifierStart(char)

  2030      * @see     Character#isLetterOrDigit(char)

  2031      * @see     Character#isUnicodeIdentifierPart(char)

  2032      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  2033      * @since   1.1

  2034      */

  2035     public static boolean isJavaIdentifierPart(char ch) {

  2036         return isJavaIdentifierPart((int)ch);

  2037     }

  2039     /**

  2040      * Determines if the character (Unicode code point) may be part of a Java

  2041      * identifier as other than the first character.

  2042      * <p>

  2043      * A character may be part of a Java identifier if any of the following

  2044      * are true:

  2045      * <ul>

  2046      * <li>  it is a letter

  2047      * <li>  it is a currency symbol (such as {@code '$'})

  2048      * <li>  it is a connecting punctuation character (such as {@code '_'})

  2049      * <li>  it is a digit

  2050      * <li>  it is a numeric letter (such as a Roman numeral character)

  2051      * <li>  it is a combining mark

  2052      * <li>  it is a non-spacing mark

  2053      * <li> {@link #isIdentifierIgnorable(int)

  2054      * isIdentifierIgnorable(codePoint)} returns {@code true} for

  2055      * the character

  2056      * </ul>

  2057      *

  2058      * @param   codePoint the character (Unicode code point) to be tested.

  2059      * @return {@code true} if the character may be part of a

  2060      *          Java identifier; {@code false} otherwise.

  2061      * @see     Character#isIdentifierIgnorable(int)

  2062      * @see     Character#isJavaIdentifierStart(int)

  2063      * @see     Character#isLetterOrDigit(int)

  2064      * @see     Character#isUnicodeIdentifierPart(int)

  2065      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  2066      * @since   1.5

  2067      */

  2068     public static boolean isJavaIdentifierPart(int codePoint) {

  2069         return isJavaIdentifierStart(codePoint) ||

  2070             ('0' <= codePoint && codePoint <= '9') || codePoint == '$';

  2071     }

  2073     /**

  2074      * Converts the character argument to lowercase using case

  2075      * mapping information from the UnicodeData file.

  2076      * <p>

  2077      * Note that

  2078      * {@code Character.isLowerCase(Character.toLowerCase(ch))}

  2079      * does not always return {@code true} for some ranges of

  2080      * characters, particularly those that are symbols or ideographs.

  2081      *

  2082      * <p>In general, {@link String#toLowerCase()} should be used to map

  2083      * characters to lowercase. {@code String} case mapping methods

  2084      * have several benefits over {@code Character} case mapping methods.

  2085      * {@code String} case mapping methods can perform locale-sensitive

  2086      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  2087      * the {@code Character} case mapping methods cannot.

  2088      *

  2089      * <p><b>Note:</b> This method cannot handle <a

  2090      * href="#supplementary"> supplementary characters</a>. To support

  2091      * all Unicode characters, including supplementary characters, use

  2092      * the {@link #toLowerCase(int)} method.

  2093      *

  2094      * @param   ch   the character to be converted.

  2095      * @return  the lowercase equivalent of the character, if any;

  2096      *          otherwise, the character itself.

  2097      * @see     Character#isLowerCase(char)

  2098      * @see     String#toLowerCase()

  2099      */

  2100     public static char toLowerCase(char ch) {

  2101         return String.valueOf(ch).toLowerCase().charAt(0);

  2102     }

  2104     /**

  2105      * Converts the character argument to uppercase using case mapping

  2106      * information from the UnicodeData file.

  2107      * <p>

  2108      * Note that

  2109      * {@code Character.isUpperCase(Character.toUpperCase(ch))}

  2110      * does not always return {@code true} for some ranges of

  2111      * characters, particularly those that are symbols or ideographs.

  2112      *

  2113      * <p>In general, {@link String#toUpperCase()} should be used to map

  2114      * characters to uppercase. {@code String} case mapping methods

  2115      * have several benefits over {@code Character} case mapping methods.

  2116      * {@code String} case mapping methods can perform locale-sensitive

  2117      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  2118      * the {@code Character} case mapping methods cannot.

  2119      *

  2120      * <p><b>Note:</b> This method cannot handle <a

  2121      * href="#supplementary"> supplementary characters</a>. To support

  2122      * all Unicode characters, including supplementary characters, use

  2123      * the {@link #toUpperCase(int)} method.

  2124      *

  2125      * @param   ch   the character to be converted.

  2126      * @return  the uppercase equivalent of the character, if any;

  2127      *          otherwise, the character itself.

  2128      * @see     Character#isUpperCase(char)

  2129      * @see     String#toUpperCase()

  2130      */

  2131     public static char toUpperCase(char ch) {

  2132         return String.valueOf(ch).toUpperCase().charAt(0);

  2133     }

  2135     /**

  2136      * Returns the numeric value of the character {@code ch} in the

  2137      * specified radix.

  2138      * <p>

  2139      * If the radix is not in the range {@code MIN_RADIX} &le;

  2140      * {@code radix} &le; {@code MAX_RADIX} or if the

  2141      * value of {@code ch} is not a valid digit in the specified

  2142      * radix, {@code -1} is returned. A character is a valid digit

  2143      * if at least one of the following is true:

  2144      * <ul>

  2145      * <li>The method {@code isDigit} is {@code true} of the character

  2146      *     and the Unicode decimal digit value of the character (or its

  2147      *     single-character decomposition) is less than the specified radix.

  2148      *     In this case the decimal digit value is returned.

  2149      * <li>The character is one of the uppercase Latin letters

  2150      *     {@code 'A'} through {@code 'Z'} and its code is less than

  2151      *     {@code radix + 'A' - 10}.

  2152      *     In this case, {@code ch - 'A' + 10}

  2153      *     is returned.

  2154      * <li>The character is one of the lowercase Latin letters

  2155      *     {@code 'a'} through {@code 'z'} and its code is less than

  2156      *     {@code radix + 'a' - 10}.

  2157      *     In this case, {@code ch - 'a' + 10}

  2158      *     is returned.

  2159      * <li>The character is one of the fullwidth uppercase Latin letters A

  2160      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})

  2161      *     and its code is less than

  2162      *     {@code radix + '\u005CuFF21' - 10}.

  2163      *     In this case, {@code ch - '\u005CuFF21' + 10}

  2164      *     is returned.

  2165      * <li>The character is one of the fullwidth lowercase Latin letters a

  2166      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})

  2167      *     and its code is less than

  2168      *     {@code radix + '\u005CuFF41' - 10}.

  2169      *     In this case, {@code ch - '\u005CuFF41' + 10}

  2170      *     is returned.

  2171      * </ul>

  2172      *

  2173      * <p><b>Note:</b> This method cannot handle <a

  2174      * href="#supplementary"> supplementary characters</a>. To support

  2175      * all Unicode characters, including supplementary characters, use

  2176      * the {@link #digit(int, int)} method.

  2177      *

  2178      * @param   ch      the character to be converted.

  2179      * @param   radix   the radix.

  2180      * @return  the numeric value represented by the character in the

  2181      *          specified radix.

  2182      * @see     Character#forDigit(int, int)

  2183      * @see     Character#isDigit(char)

  2184      */

  2185     public static int digit(char ch, int radix) {

  2186         return digit((int)ch, radix);

  2187     }

  2189     /**

  2190      * Returns the numeric value of the specified character (Unicode

  2191      * code point) in the specified radix.

  2192      *

  2193      * <p>If the radix is not in the range {@code MIN_RADIX} &le;

  2194      * {@code radix} &le; {@code MAX_RADIX} or if the

  2195      * character is not a valid digit in the specified

  2196      * radix, {@code -1} is returned. A character is a valid digit

  2197      * if at least one of the following is true:

  2198      * <ul>

  2199      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character

  2200      *     and the Unicode decimal digit value of the character (or its

  2201      *     single-character decomposition) is less than the specified radix.

  2202      *     In this case the decimal digit value is returned.

  2203      * <li>The character is one of the uppercase Latin letters

  2204      *     {@code 'A'} through {@code 'Z'} and its code is less than

  2205      *     {@code radix + 'A' - 10}.

  2206      *     In this case, {@code codePoint - 'A' + 10}

  2207      *     is returned.

  2208      * <li>The character is one of the lowercase Latin letters

  2209      *     {@code 'a'} through {@code 'z'} and its code is less than

  2210      *     {@code radix + 'a' - 10}.

  2211      *     In this case, {@code codePoint - 'a' + 10}

  2212      *     is returned.

  2213      * <li>The character is one of the fullwidth uppercase Latin letters A

  2214      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})

  2215      *     and its code is less than

  2216      *     {@code radix + '\u005CuFF21' - 10}.

  2217      *     In this case,

  2218      *     {@code codePoint - '\u005CuFF21' + 10}

  2219      *     is returned.

  2220      * <li>The character is one of the fullwidth lowercase Latin letters a

  2221      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})

  2222      *     and its code is less than

  2223      *     {@code radix + '\u005CuFF41'- 10}.

  2224      *     In this case,

  2225      *     {@code codePoint - '\u005CuFF41' + 10}

  2226      *     is returned.

  2227      * </ul>

  2228      *

  2229      * @param   codePoint the character (Unicode code point) to be converted.

  2230      * @param   radix   the radix.

  2231      * @return  the numeric value represented by the character in the

  2232      *          specified radix.

  2233      * @see     Character#forDigit(int, int)

  2234      * @see     Character#isDigit(int)

  2235      * @since   1.5

  2236      */

  2237     @JavaScriptBody(args = { "codePoint", "radix" }, body=

  2238         "var x = parseInt(String.fromCharCode(codePoint), radix);\n"

  2239       + "return isNaN(x) ? -1 : x;"

  2240     )

  2241     public static int digit(int codePoint, int radix) {

  2242         throw new UnsupportedOperationException();

  2243     }

  2245     /**

  2246      * Returns the {@code int} value that the specified Unicode

  2247      * character represents. For example, the character

  2248      * {@code '\u005Cu216C'} (the roman numeral fifty) will return

  2249      * an int with a value of 50.

  2250      * <p>

  2251      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through

  2252      * {@code '\u005Cu005A'}), lowercase

  2253      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and

  2254      * full width variant ({@code '\u005CuFF21'} through

  2255      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through

  2256      * {@code '\u005CuFF5A'}) forms have numeric values from 10

  2257      * through 35. This is independent of the Unicode specification,

  2258      * which does not assign numeric values to these {@code char}

  2259      * values.

  2260      * <p>

  2261      * If the character does not have a numeric value, then -1 is returned.

  2262      * If the character has a numeric value that cannot be represented as a

  2263      * nonnegative integer (for example, a fractional value), then -2

  2264      * is returned.

  2265      *

  2266      * <p><b>Note:</b> This method cannot handle <a

  2267      * href="#supplementary"> supplementary characters</a>. To support

  2268      * all Unicode characters, including supplementary characters, use

  2269      * the {@link #getNumericValue(int)} method.

  2270      *

  2271      * @param   ch      the character to be converted.

  2272      * @return  the numeric value of the character, as a nonnegative {@code int}

  2273      *           value; -2 if the character has a numeric value that is not a

  2274      *          nonnegative integer; -1 if the character has no numeric value.

  2275      * @see     Character#forDigit(int, int)

  2276      * @see     Character#isDigit(char)

  2277      * @since   1.1

  2278      */

  2279     public static int getNumericValue(char ch) {

  2280         return getNumericValue((int)ch);

  2281     }

  2283     /**

  2284      * Returns the {@code int} value that the specified

  2285      * character (Unicode code point) represents. For example, the character

  2286      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return

  2287      * an {@code int} with a value of 50.

  2288      * <p>

  2289      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through

  2290      * {@code '\u005Cu005A'}), lowercase

  2291      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and

  2292      * full width variant ({@code '\u005CuFF21'} through

  2293      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through

  2294      * {@code '\u005CuFF5A'}) forms have numeric values from 10

  2295      * through 35. This is independent of the Unicode specification,

  2296      * which does not assign numeric values to these {@code char}

  2297      * values.

  2298      * <p>

  2299      * If the character does not have a numeric value, then -1 is returned.

  2300      * If the character has a numeric value that cannot be represented as a

  2301      * nonnegative integer (for example, a fractional value), then -2

  2302      * is returned.

  2303      *

  2304      * @param   codePoint the character (Unicode code point) to be converted.

  2305      * @return  the numeric value of the character, as a nonnegative {@code int}

  2306      *          value; -2 if the character has a numeric value that is not a

  2307      *          nonnegative integer; -1 if the character has no numeric value.

  2308      * @see     Character#forDigit(int, int)

  2309      * @see     Character#isDigit(int)

  2310      * @since   1.5

  2311      */

  2312     public static int getNumericValue(int codePoint) {

  2313         throw new UnsupportedOperationException();

  2314     }

  2316     /**

  2317      * Determines if the specified character is ISO-LATIN-1 white space.

  2318      * This method returns {@code true} for the following five

  2319      * characters only:

  2320      * <table>

  2321      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>

  2322      *     <td>{@code HORIZONTAL TABULATION}</td></tr>

  2323      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>

  2324      *     <td>{@code NEW LINE}</td></tr>

  2325      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>

  2326      *     <td>{@code FORM FEED}</td></tr>

  2327      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>

  2328      *     <td>{@code CARRIAGE RETURN}</td></tr>

  2329      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>

  2330      *     <td>{@code SPACE}</td></tr>

  2331      * </table>

  2332      *

  2333      * @param      ch   the character to be tested.

  2334      * @return     {@code true} if the character is ISO-LATIN-1 white

  2335      *             space; {@code false} otherwise.

  2336      * @see        Character#isSpaceChar(char)

  2337      * @see        Character#isWhitespace(char)

  2338      * @deprecated Replaced by isWhitespace(char).

  2339      */

  2340     @Deprecated

  2341     public static boolean isSpace(char ch) {

  2342         return isSpaceChar(ch);

  2343     }

  2345     public static boolean isSpaceChar(int ch) {

  2346         return (ch <= 0x0020) &&

  2347             (((((1L << 0x0009) |

  2348             (1L << 0x000A) |

  2349             (1L << 0x000C) |

  2350             (1L << 0x000D) |

  2351             (1L << 0x0020)) >> ch) & 1L) != 0);

  2352     }

  2355     /**

  2356      * Determines if the specified character is white space according to Java.

  2357      * A character is a Java whitespace character if and only if it satisfies

  2358      * one of the following criteria:

  2359      * <ul>

  2360      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},

  2361      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})

  2362      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},

  2363      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).

  2364      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.

  2365      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.

  2366      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.

  2367      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.

  2368      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.

  2369      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.

  2370      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.

  2371      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.

  2372      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.

  2373      * </ul>

  2374      *

  2375      * <p><b>Note:</b> This method cannot handle <a

  2376      * href="#supplementary"> supplementary characters</a>. To support

  2377      * all Unicode characters, including supplementary characters, use

  2378      * the {@link #isWhitespace(int)} method.

  2379      *

  2380      * @param   ch the character to be tested.

  2381      * @return  {@code true} if the character is a Java whitespace

  2382      *          character; {@code false} otherwise.

  2383      * @see     Character#isSpaceChar(char)

  2384      * @since   1.1

  2385      */

  2386     public static boolean isWhitespace(char ch) {

  2387         return isWhitespace((int)ch);

  2388     }

  2390     /**

  2391      * Determines if the specified character (Unicode code point) is

  2392      * white space according to Java.  A character is a Java

  2393      * whitespace character if and only if it satisfies one of the

  2394      * following criteria:

  2395      * <ul>

  2396      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},

  2397      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})

  2398      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},

  2399      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).

  2400      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.

  2401      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.

  2402      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.

  2403      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.

  2404      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.

  2405      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.

  2406      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.

  2407      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.

  2408      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.

  2409      * </ul>

  2410      * <p>

  2411      *

  2412      * @param   codePoint the character (Unicode code point) to be tested.

  2413      * @return  {@code true} if the character is a Java whitespace

  2414      *          character; {@code false} otherwise.

  2415      * @see     Character#isSpaceChar(int)

  2416      * @since   1.5

  2417      */

  2418     public static boolean isWhitespace(int codePoint) {

  2419         // values up to 128: [9,10,11,12,13,28,29,30,31,32]

  2420         if (9 <= codePoint && 13 >= codePoint) {

  2421             return true;

  2422         }

  2423         if (28 <= codePoint && 32 >= codePoint) {

  2424             return true;

  2425         }

  2426         return false;

  2427     }

  2429     /**

  2430      * Determines if the specified character is an ISO control

  2431      * character.  A character is considered to be an ISO control

  2432      * character if its code is in the range {@code '\u005Cu0000'}

  2433      * through {@code '\u005Cu001F'} or in the range

  2434      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.

  2435      *

  2436      * <p><b>Note:</b> This method cannot handle <a

  2437      * href="#supplementary"> supplementary characters</a>. To support

  2438      * all Unicode characters, including supplementary characters, use

  2439      * the {@link #isISOControl(int)} method.

  2440      *

  2441      * @param   ch      the character to be tested.

  2442      * @return  {@code true} if the character is an ISO control character;

  2443      *          {@code false} otherwise.

  2444      *

  2445      * @see     Character#isSpaceChar(char)

  2446      * @see     Character#isWhitespace(char)

  2447      * @since   1.1

  2448      */

  2449     public static boolean isISOControl(char ch) {

  2450         return isISOControl((int)ch);

  2451     }

  2453     /**

  2454      * Determines if the referenced character (Unicode code point) is an ISO control

  2455      * character.  A character is considered to be an ISO control

  2456      * character if its code is in the range {@code '\u005Cu0000'}

  2457      * through {@code '\u005Cu001F'} or in the range

  2458      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.

  2459      *

  2460      * @param   codePoint the character (Unicode code point) to be tested.

  2461      * @return  {@code true} if the character is an ISO control character;

  2462      *          {@code false} otherwise.

  2463      * @see     Character#isSpaceChar(int)

  2464      * @see     Character#isWhitespace(int)

  2465      * @since   1.5

  2466      */

  2467     public static boolean isISOControl(int codePoint) {

  2468         // Optimized form of:

  2469         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||

  2470         //     (codePoint >= 0x7F && codePoint <= 0x9F);

  2471         return codePoint <= 0x9F &&

  2472             (codePoint >= 0x7F || (codePoint >>> 5 == 0));

  2473     }

  2475     /**

  2476      * Determines the character representation for a specific digit in

  2477      * the specified radix. If the value of {@code radix} is not a

  2478      * valid radix, or the value of {@code digit} is not a valid

  2479      * digit in the specified radix, the null character

  2480      * ({@code '\u005Cu0000'}) is returned.

  2481      * <p>

  2482      * The {@code radix} argument is valid if it is greater than or

  2483      * equal to {@code MIN_RADIX} and less than or equal to

  2484      * {@code MAX_RADIX}. The {@code digit} argument is valid if

  2485      * {@code 0 <= digit < radix}.

  2486      * <p>

  2487      * If the digit is less than 10, then

  2488      * {@code '0' + digit} is returned. Otherwise, the value

  2489      * {@code 'a' + digit - 10} is returned.

  2490      *

  2491      * @param   digit   the number to convert to a character.

  2492      * @param   radix   the radix.

  2493      * @return  the {@code char} representation of the specified digit

  2494      *          in the specified radix.

  2495      * @see     Character#MIN_RADIX

  2496      * @see     Character#MAX_RADIX

  2497      * @see     Character#digit(char, int)

  2498      */

  2499     public static char forDigit(int digit, int radix) {

  2500         if ((digit >= radix) || (digit < 0)) {

  2501             return '\0';

  2502         }

  2503         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {

  2504             return '\0';

  2505         }

  2506         if (digit < 10) {

  2507             return (char)('0' + digit);

  2508         }

  2509         return (char)('a' - 10 + digit);

  2510     }

  2512     /**

  2513      * Compares two {@code Character} objects numerically.

  2514      *

  2515      * @param   anotherCharacter   the {@code Character} to be compared.

  2517      * @return  the value {@code 0} if the argument {@code Character}

  2518      *          is equal to this {@code Character}; a value less than

  2519      *          {@code 0} if this {@code Character} is numerically less

  2520      *          than the {@code Character} argument; and a value greater than

  2521      *          {@code 0} if this {@code Character} is numerically greater

  2522      *          than the {@code Character} argument (unsigned comparison).

  2523      *          Note that this is strictly a numerical comparison; it is not

  2524      *          locale-dependent.

  2525      * @since   1.2

  2526      */

  2527     public int compareTo(Character anotherCharacter) {

  2528         return compare(this.value, anotherCharacter.value);

  2529     }

  2531     /**

  2532      * Compares two {@code char} values numerically.

  2533      * The value returned is identical to what would be returned by:

  2534      * <pre>

  2535      *    Character.valueOf(x).compareTo(Character.valueOf(y))

  2536      * </pre>

  2537      *

  2538      * @param  x the first {@code char} to compare

  2539      * @param  y the second {@code char} to compare

  2540      * @return the value {@code 0} if {@code x == y};

  2541      *         a value less than {@code 0} if {@code x < y}; and

  2542      *         a value greater than {@code 0} if {@code x > y}

  2543      * @since 1.7

  2544      */

  2545     public static int compare(char x, char y) {

  2546         return x - y;

  2547     }

  2550     /**

  2551      * The number of bits used to represent a <tt>char</tt> value in unsigned

  2552      * binary form, constant {@code 16}.

  2553      *

  2554      * @since 1.5

  2555      */

  2556     public static final int SIZE = 16;

  2558     /**

  2559      * Returns the value obtained by reversing the order of the bytes in the

  2560      * specified <tt>char</tt> value.

  2561      *

  2562      * @return the value obtained by reversing (or, equivalently, swapping)

  2563      *     the bytes in the specified <tt>char</tt> value.

  2564      * @since 1.5

  2565      */

  2566     public static char reverseBytes(char ch) {

  2567         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));

  2568     }

  2570     static {

  2571         // as last step of initialization, initialize valueOf method

  2572         initValueOf();

  2573     }

  2574     @JavaScriptBody(args = {}, body =

  2575         "vm.java_lang_Character(false)." +

  2576         "valueOf = function() { return this._value(); };"

  2577     )

  2578     private native static void initValueOf();

  2580 }

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Tue, 17 Jan 2017 07:04:06 +0100
changeset 1985	cd1cc103a03c
parent 1384	12a395b571c8
permissions	-rw-r--r--