1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/rt/emul/mini/src/main/java/java/lang/Character.java	Tue Feb 26 16:54:16 2013 +0100
     1.3 @@ -0,0 +1,2519 @@
     1.4 +/*
     1.5 + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package java.lang;
    1.30 +
    1.31 +import org.apidesign.bck2brwsr.core.JavaScriptBody;
    1.32 +
    1.33 +/**
    1.34 + * The {@code Character} class wraps a value of the primitive
    1.35 + * type {@code char} in an object. An object of type
    1.36 + * {@code Character} contains a single field whose type is
    1.37 + * {@code char}.
    1.38 + * <p>
    1.39 + * In addition, this class provides several methods for determining
    1.40 + * a character's category (lowercase letter, digit, etc.) and for converting
    1.41 + * characters from uppercase to lowercase and vice versa.
    1.42 + * <p>
    1.43 + * Character information is based on the Unicode Standard, version 6.0.0.
    1.44 + * <p>
    1.45 + * The methods and data of class {@code Character} are defined by
    1.46 + * the information in the <i>UnicodeData</i> file that is part of the
    1.47 + * Unicode Character Database maintained by the Unicode
    1.48 + * Consortium. This file specifies various properties including name
    1.49 + * and general category for every defined Unicode code point or
    1.50 + * character range.
    1.51 + * <p>
    1.52 + * The file and its description are available from the Unicode Consortium at:
    1.53 + * <ul>
    1.54 + * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
    1.55 + * </ul>
    1.56 + *
    1.57 + * <h4><a name="unicode">Unicode Character Representations</a></h4>
    1.58 + *
    1.59 + * <p>The {@code char} data type (and therefore the value that a
    1.60 + * {@code Character} object encapsulates) are based on the
    1.61 + * original Unicode specification, which defined characters as
    1.62 + * fixed-width 16-bit entities. The Unicode Standard has since been
    1.63 + * changed to allow for characters whose representation requires more
    1.64 + * than 16 bits.  The range of legal <em>code point</em>s is now
    1.65 + * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
    1.66 + * (Refer to the <a
    1.67 + * href="http://www.unicode.org/reports/tr27/#notation"><i>
    1.68 + * definition</i></a> of the U+<i>n</i> notation in the Unicode
    1.69 + * Standard.)
    1.70 + *
    1.71 + * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
    1.72 + * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
    1.73 + * <a name="supplementary">Characters</a> whose code points are greater
    1.74 + * than U+FFFF are called <em>supplementary character</em>s.  The Java
    1.75 + * platform uses the UTF-16 representation in {@code char} arrays and
    1.76 + * in the {@code String} and {@code StringBuffer} classes. In
    1.77 + * this representation, supplementary characters are represented as a pair
    1.78 + * of {@code char} values, the first from the <em>high-surrogates</em>
    1.79 + * range, (&#92;uD800-&#92;uDBFF), the second from the
    1.80 + * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
    1.81 + *
    1.82 + * <p>A {@code char} value, therefore, represents Basic
    1.83 + * Multilingual Plane (BMP) code points, including the surrogate
    1.84 + * code points, or code units of the UTF-16 encoding. An
    1.85 + * {@code int} value represents all Unicode code points,
    1.86 + * including supplementary code points. The lower (least significant)
    1.87 + * 21 bits of {@code int} are used to represent Unicode code
    1.88 + * points and the upper (most significant) 11 bits must be zero.
    1.89 + * Unless otherwise specified, the behavior with respect to
    1.90 + * supplementary characters and surrogate {@code char} values is
    1.91 + * as follows:
    1.92 + *
    1.93 + * <ul>
    1.94 + * <li>The methods that only accept a {@code char} value cannot support
    1.95 + * supplementary characters. They treat {@code char} values from the
    1.96 + * surrogate ranges as undefined characters. For example,
    1.97 + * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
    1.98 + * this specific value if followed by any low-surrogate value in a string
    1.99 + * would represent a letter.
   1.100 + *
   1.101 + * <li>The methods that accept an {@code int} value support all
   1.102 + * Unicode characters, including supplementary characters. For
   1.103 + * example, {@code Character.isLetter(0x2F81A)} returns
   1.104 + * {@code true} because the code point value represents a letter
   1.105 + * (a CJK ideograph).
   1.106 + * </ul>
   1.107 + *
   1.108 + * <p>In the Java SE API documentation, <em>Unicode code point</em> is
   1.109 + * used for character values in the range between U+0000 and U+10FFFF,
   1.110 + * and <em>Unicode code unit</em> is used for 16-bit
   1.111 + * {@code char} values that are code units of the <em>UTF-16</em>
   1.112 + * encoding. For more information on Unicode terminology, refer to the
   1.113 + * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
   1.114 + *
   1.115 + * @author  Lee Boynton
   1.116 + * @author  Guy Steele
   1.117 + * @author  Akira Tanaka
   1.118 + * @author  Martin Buchholz
   1.119 + * @author  Ulf Zibis
   1.120 + * @since   1.0
   1.121 + */
   1.122 +public final
   1.123 +class Character implements java.io.Serializable, Comparable<Character> {
   1.124 +    /**
   1.125 +     * The minimum radix available for conversion to and from strings.
   1.126 +     * The constant value of this field is the smallest value permitted
   1.127 +     * for the radix argument in radix-conversion methods such as the
   1.128 +     * {@code digit} method, the {@code forDigit} method, and the
   1.129 +     * {@code toString} method of class {@code Integer}.
   1.130 +     *
   1.131 +     * @see     Character#digit(char, int)
   1.132 +     * @see     Character#forDigit(int, int)
   1.133 +     * @see     Integer#toString(int, int)
   1.134 +     * @see     Integer#valueOf(String)
   1.135 +     */
   1.136 +    public static final int MIN_RADIX = 2;
   1.137 +
   1.138 +    /**
   1.139 +     * The maximum radix available for conversion to and from strings.
   1.140 +     * The constant value of this field is the largest value permitted
   1.141 +     * for the radix argument in radix-conversion methods such as the
   1.142 +     * {@code digit} method, the {@code forDigit} method, and the
   1.143 +     * {@code toString} method of class {@code Integer}.
   1.144 +     *
   1.145 +     * @see     Character#digit(char, int)
   1.146 +     * @see     Character#forDigit(int, int)
   1.147 +     * @see     Integer#toString(int, int)
   1.148 +     * @see     Integer#valueOf(String)
   1.149 +     */
   1.150 +    public static final int MAX_RADIX = 36;
   1.151 +
   1.152 +    /**
   1.153 +     * The constant value of this field is the smallest value of type
   1.154 +     * {@code char}, {@code '\u005Cu0000'}.
   1.155 +     *
   1.156 +     * @since   1.0.2
   1.157 +     */
   1.158 +    public static final char MIN_VALUE = '\u0000';
   1.159 +
   1.160 +    /**
   1.161 +     * The constant value of this field is the largest value of type
   1.162 +     * {@code char}, {@code '\u005CuFFFF'}.
   1.163 +     *
   1.164 +     * @since   1.0.2
   1.165 +     */
   1.166 +    public static final char MAX_VALUE = '\uFFFF';
   1.167 +
   1.168 +    /**
   1.169 +     * The {@code Class} instance representing the primitive type
   1.170 +     * {@code char}.
   1.171 +     *
   1.172 +     * @since   1.1
   1.173 +     */
   1.174 +    public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
   1.175 +
   1.176 +    /*
   1.177 +     * Normative general types
   1.178 +     */
   1.179 +
   1.180 +    /*
   1.181 +     * General character types
   1.182 +     */
   1.183 +
   1.184 +    /**
   1.185 +     * General category "Cn" in the Unicode specification.
   1.186 +     * @since   1.1
   1.187 +     */
   1.188 +    public static final byte UNASSIGNED = 0;
   1.189 +
   1.190 +    /**
   1.191 +     * General category "Lu" in the Unicode specification.
   1.192 +     * @since   1.1
   1.193 +     */
   1.194 +    public static final byte UPPERCASE_LETTER = 1;
   1.195 +
   1.196 +    /**
   1.197 +     * General category "Ll" in the Unicode specification.
   1.198 +     * @since   1.1
   1.199 +     */
   1.200 +    public static final byte LOWERCASE_LETTER = 2;
   1.201 +
   1.202 +    /**
   1.203 +     * General category "Lt" in the Unicode specification.
   1.204 +     * @since   1.1
   1.205 +     */
   1.206 +    public static final byte TITLECASE_LETTER = 3;
   1.207 +
   1.208 +    /**
   1.209 +     * General category "Lm" in the Unicode specification.
   1.210 +     * @since   1.1
   1.211 +     */
   1.212 +    public static final byte MODIFIER_LETTER = 4;
   1.213 +
   1.214 +    /**
   1.215 +     * General category "Lo" in the Unicode specification.
   1.216 +     * @since   1.1
   1.217 +     */
   1.218 +    public static final byte OTHER_LETTER = 5;
   1.219 +
   1.220 +    /**
   1.221 +     * General category "Mn" in the Unicode specification.
   1.222 +     * @since   1.1
   1.223 +     */
   1.224 +    public static final byte NON_SPACING_MARK = 6;
   1.225 +
   1.226 +    /**
   1.227 +     * General category "Me" in the Unicode specification.
   1.228 +     * @since   1.1
   1.229 +     */
   1.230 +    public static final byte ENCLOSING_MARK = 7;
   1.231 +
   1.232 +    /**
   1.233 +     * General category "Mc" in the Unicode specification.
   1.234 +     * @since   1.1
   1.235 +     */
   1.236 +    public static final byte COMBINING_SPACING_MARK = 8;
   1.237 +
   1.238 +    /**
   1.239 +     * General category "Nd" in the Unicode specification.
   1.240 +     * @since   1.1
   1.241 +     */
   1.242 +    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
   1.243 +
   1.244 +    /**
   1.245 +     * General category "Nl" in the Unicode specification.
   1.246 +     * @since   1.1
   1.247 +     */
   1.248 +    public static final byte LETTER_NUMBER = 10;
   1.249 +
   1.250 +    /**
   1.251 +     * General category "No" in the Unicode specification.
   1.252 +     * @since   1.1
   1.253 +     */
   1.254 +    public static final byte OTHER_NUMBER = 11;
   1.255 +
   1.256 +    /**
   1.257 +     * General category "Zs" in the Unicode specification.
   1.258 +     * @since   1.1
   1.259 +     */
   1.260 +    public static final byte SPACE_SEPARATOR = 12;
   1.261 +
   1.262 +    /**
   1.263 +     * General category "Zl" in the Unicode specification.
   1.264 +     * @since   1.1
   1.265 +     */
   1.266 +    public static final byte LINE_SEPARATOR = 13;
   1.267 +
   1.268 +    /**
   1.269 +     * General category "Zp" in the Unicode specification.
   1.270 +     * @since   1.1
   1.271 +     */
   1.272 +    public static final byte PARAGRAPH_SEPARATOR = 14;
   1.273 +
   1.274 +    /**
   1.275 +     * General category "Cc" in the Unicode specification.
   1.276 +     * @since   1.1
   1.277 +     */
   1.278 +    public static final byte CONTROL = 15;
   1.279 +
   1.280 +    /**
   1.281 +     * General category "Cf" in the Unicode specification.
   1.282 +     * @since   1.1
   1.283 +     */
   1.284 +    public static final byte FORMAT = 16;
   1.285 +
   1.286 +    /**
   1.287 +     * General category "Co" in the Unicode specification.
   1.288 +     * @since   1.1
   1.289 +     */
   1.290 +    public static final byte PRIVATE_USE = 18;
   1.291 +
   1.292 +    /**
   1.293 +     * General category "Cs" in the Unicode specification.
   1.294 +     * @since   1.1
   1.295 +     */
   1.296 +    public static final byte SURROGATE = 19;
   1.297 +
   1.298 +    /**
   1.299 +     * General category "Pd" in the Unicode specification.
   1.300 +     * @since   1.1
   1.301 +     */
   1.302 +    public static final byte DASH_PUNCTUATION = 20;
   1.303 +
   1.304 +    /**
   1.305 +     * General category "Ps" in the Unicode specification.
   1.306 +     * @since   1.1
   1.307 +     */
   1.308 +    public static final byte START_PUNCTUATION = 21;
   1.309 +
   1.310 +    /**
   1.311 +     * General category "Pe" in the Unicode specification.
   1.312 +     * @since   1.1
   1.313 +     */
   1.314 +    public static final byte END_PUNCTUATION = 22;
   1.315 +
   1.316 +    /**
   1.317 +     * General category "Pc" in the Unicode specification.
   1.318 +     * @since   1.1
   1.319 +     */
   1.320 +    public static final byte CONNECTOR_PUNCTUATION = 23;
   1.321 +
   1.322 +    /**
   1.323 +     * General category "Po" in the Unicode specification.
   1.324 +     * @since   1.1
   1.325 +     */
   1.326 +    public static final byte OTHER_PUNCTUATION = 24;
   1.327 +
   1.328 +    /**
   1.329 +     * General category "Sm" in the Unicode specification.
   1.330 +     * @since   1.1
   1.331 +     */
   1.332 +    public static final byte MATH_SYMBOL = 25;
   1.333 +
   1.334 +    /**
   1.335 +     * General category "Sc" in the Unicode specification.
   1.336 +     * @since   1.1
   1.337 +     */
   1.338 +    public static final byte CURRENCY_SYMBOL = 26;
   1.339 +
   1.340 +    /**
   1.341 +     * General category "Sk" in the Unicode specification.
   1.342 +     * @since   1.1
   1.343 +     */
   1.344 +    public static final byte MODIFIER_SYMBOL = 27;
   1.345 +
   1.346 +    /**
   1.347 +     * General category "So" in the Unicode specification.
   1.348 +     * @since   1.1
   1.349 +     */
   1.350 +    public static final byte OTHER_SYMBOL = 28;
   1.351 +
   1.352 +    /**
   1.353 +     * General category "Pi" in the Unicode specification.
   1.354 +     * @since   1.4
   1.355 +     */
   1.356 +    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
   1.357 +
   1.358 +    /**
   1.359 +     * General category "Pf" in the Unicode specification.
   1.360 +     * @since   1.4
   1.361 +     */
   1.362 +    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
   1.363 +
   1.364 +    /**
   1.365 +     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
   1.366 +     */
   1.367 +    static final int ERROR = 0xFFFFFFFF;
   1.368 +
   1.369 +
   1.370 +    /**
   1.371 +     * Undefined bidirectional character type. Undefined {@code char}
   1.372 +     * values have undefined directionality in the Unicode specification.
   1.373 +     * @since 1.4
   1.374 +     */
   1.375 +    public static final byte DIRECTIONALITY_UNDEFINED = -1;
   1.376 +
   1.377 +    /**
   1.378 +     * Strong bidirectional character type "L" in the Unicode specification.
   1.379 +     * @since 1.4
   1.380 +     */
   1.381 +    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
   1.382 +
   1.383 +    /**
   1.384 +     * Strong bidirectional character type "R" in the Unicode specification.
   1.385 +     * @since 1.4
   1.386 +     */
   1.387 +    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
   1.388 +
   1.389 +    /**
   1.390 +    * Strong bidirectional character type "AL" in the Unicode specification.
   1.391 +     * @since 1.4
   1.392 +     */
   1.393 +    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
   1.394 +
   1.395 +    /**
   1.396 +     * Weak bidirectional character type "EN" in the Unicode specification.
   1.397 +     * @since 1.4
   1.398 +     */
   1.399 +    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
   1.400 +
   1.401 +    /**
   1.402 +     * Weak bidirectional character type "ES" in the Unicode specification.
   1.403 +     * @since 1.4
   1.404 +     */
   1.405 +    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
   1.406 +
   1.407 +    /**
   1.408 +     * Weak bidirectional character type "ET" in the Unicode specification.
   1.409 +     * @since 1.4
   1.410 +     */
   1.411 +    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
   1.412 +
   1.413 +    /**
   1.414 +     * Weak bidirectional character type "AN" in the Unicode specification.
   1.415 +     * @since 1.4
   1.416 +     */
   1.417 +    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
   1.418 +
   1.419 +    /**
   1.420 +     * Weak bidirectional character type "CS" in the Unicode specification.
   1.421 +     * @since 1.4
   1.422 +     */
   1.423 +    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
   1.424 +
   1.425 +    /**
   1.426 +     * Weak bidirectional character type "NSM" in the Unicode specification.
   1.427 +     * @since 1.4
   1.428 +     */
   1.429 +    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
   1.430 +
   1.431 +    /**
   1.432 +     * Weak bidirectional character type "BN" in the Unicode specification.
   1.433 +     * @since 1.4
   1.434 +     */
   1.435 +    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
   1.436 +
   1.437 +    /**
   1.438 +     * Neutral bidirectional character type "B" in the Unicode specification.
   1.439 +     * @since 1.4
   1.440 +     */
   1.441 +    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
   1.442 +
   1.443 +    /**
   1.444 +     * Neutral bidirectional character type "S" in the Unicode specification.
   1.445 +     * @since 1.4
   1.446 +     */
   1.447 +    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
   1.448 +
   1.449 +    /**
   1.450 +     * Neutral bidirectional character type "WS" in the Unicode specification.
   1.451 +     * @since 1.4
   1.452 +     */
   1.453 +    public static final byte DIRECTIONALITY_WHITESPACE = 12;
   1.454 +
   1.455 +    /**
   1.456 +     * Neutral bidirectional character type "ON" in the Unicode specification.
   1.457 +     * @since 1.4
   1.458 +     */
   1.459 +    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
   1.460 +
   1.461 +    /**
   1.462 +     * Strong bidirectional character type "LRE" in the Unicode specification.
   1.463 +     * @since 1.4
   1.464 +     */
   1.465 +    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
   1.466 +
   1.467 +    /**
   1.468 +     * Strong bidirectional character type "LRO" in the Unicode specification.
   1.469 +     * @since 1.4
   1.470 +     */
   1.471 +    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
   1.472 +
   1.473 +    /**
   1.474 +     * Strong bidirectional character type "RLE" in the Unicode specification.
   1.475 +     * @since 1.4
   1.476 +     */
   1.477 +    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
   1.478 +
   1.479 +    /**
   1.480 +     * Strong bidirectional character type "RLO" in the Unicode specification.
   1.481 +     * @since 1.4
   1.482 +     */
   1.483 +    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
   1.484 +
   1.485 +    /**
   1.486 +     * Weak bidirectional character type "PDF" in the Unicode specification.
   1.487 +     * @since 1.4
   1.488 +     */
   1.489 +    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
   1.490 +
   1.491 +    /**
   1.492 +     * The minimum value of a
   1.493 +     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   1.494 +     * Unicode high-surrogate code unit</a>
   1.495 +     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
   1.496 +     * A high-surrogate is also known as a <i>leading-surrogate</i>.
   1.497 +     *
   1.498 +     * @since 1.5
   1.499 +     */
   1.500 +    public static final char MIN_HIGH_SURROGATE = '\uD800';
   1.501 +
   1.502 +    /**
   1.503 +     * The maximum value of a
   1.504 +     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   1.505 +     * Unicode high-surrogate code unit</a>
   1.506 +     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
   1.507 +     * A high-surrogate is also known as a <i>leading-surrogate</i>.
   1.508 +     *
   1.509 +     * @since 1.5
   1.510 +     */
   1.511 +    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
   1.512 +
   1.513 +    /**
   1.514 +     * The minimum value of a
   1.515 +     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   1.516 +     * Unicode low-surrogate code unit</a>
   1.517 +     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
   1.518 +     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   1.519 +     *
   1.520 +     * @since 1.5
   1.521 +     */
   1.522 +    public static final char MIN_LOW_SURROGATE  = '\uDC00';
   1.523 +
   1.524 +    /**
   1.525 +     * The maximum value of a
   1.526 +     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   1.527 +     * Unicode low-surrogate code unit</a>
   1.528 +     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   1.529 +     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   1.530 +     *
   1.531 +     * @since 1.5
   1.532 +     */
   1.533 +    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
   1.534 +
   1.535 +    /**
   1.536 +     * The minimum value of a Unicode surrogate code unit in the
   1.537 +     * UTF-16 encoding, constant {@code '\u005CuD800'}.
   1.538 +     *
   1.539 +     * @since 1.5
   1.540 +     */
   1.541 +    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
   1.542 +
   1.543 +    /**
   1.544 +     * The maximum value of a Unicode surrogate code unit in the
   1.545 +     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   1.546 +     *
   1.547 +     * @since 1.5
   1.548 +     */
   1.549 +    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
   1.550 +
   1.551 +    /**
   1.552 +     * The minimum value of a
   1.553 +     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
   1.554 +     * Unicode supplementary code point</a>, constant {@code U+10000}.
   1.555 +     *
   1.556 +     * @since 1.5
   1.557 +     */
   1.558 +    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
   1.559 +
   1.560 +    /**
   1.561 +     * The minimum value of a
   1.562 +     * <a href="http://www.unicode.org/glossary/#code_point">
   1.563 +     * Unicode code point</a>, constant {@code U+0000}.
   1.564 +     *
   1.565 +     * @since 1.5
   1.566 +     */
   1.567 +    public static final int MIN_CODE_POINT = 0x000000;
   1.568 +
   1.569 +    /**
   1.570 +     * The maximum value of a
   1.571 +     * <a href="http://www.unicode.org/glossary/#code_point">
   1.572 +     * Unicode code point</a>, constant {@code U+10FFFF}.
   1.573 +     *
   1.574 +     * @since 1.5
   1.575 +     */
   1.576 +    public static final int MAX_CODE_POINT = 0X10FFFF;
   1.577 +
   1.578 +
   1.579 +    /**
   1.580 +     * Instances of this class represent particular subsets of the Unicode
   1.581 +     * character set.  The only family of subsets defined in the
   1.582 +     * {@code Character} class is {@link Character.UnicodeBlock}.
   1.583 +     * Other portions of the Java API may define other subsets for their
   1.584 +     * own purposes.
   1.585 +     *
   1.586 +     * @since 1.2
   1.587 +     */
   1.588 +    public static class Subset  {
   1.589 +
   1.590 +        private String name;
   1.591 +
   1.592 +        /**
   1.593 +         * Constructs a new {@code Subset} instance.
   1.594 +         *
   1.595 +         * @param  name  The name of this subset
   1.596 +         * @exception NullPointerException if name is {@code null}
   1.597 +         */
   1.598 +        protected Subset(String name) {
   1.599 +            if (name == null) {
   1.600 +                throw new NullPointerException("name");
   1.601 +            }
   1.602 +            this.name = name;
   1.603 +        }
   1.604 +
   1.605 +        /**
   1.606 +         * Compares two {@code Subset} objects for equality.
   1.607 +         * This method returns {@code true} if and only if
   1.608 +         * {@code this} and the argument refer to the same
   1.609 +         * object; since this method is {@code final}, this
   1.610 +         * guarantee holds for all subclasses.
   1.611 +         */
   1.612 +        public final boolean equals(Object obj) {
   1.613 +            return (this == obj);
   1.614 +        }
   1.615 +
   1.616 +        /**
   1.617 +         * Returns the standard hash code as defined by the
   1.618 +         * {@link Object#hashCode} method.  This method
   1.619 +         * is {@code final} in order to ensure that the
   1.620 +         * {@code equals} and {@code hashCode} methods will
   1.621 +         * be consistent in all subclasses.
   1.622 +         */
   1.623 +        public final int hashCode() {
   1.624 +            return super.hashCode();
   1.625 +        }
   1.626 +
   1.627 +        /**
   1.628 +         * Returns the name of this subset.
   1.629 +         */
   1.630 +        public final String toString() {
   1.631 +            return name;
   1.632 +        }
   1.633 +    }
   1.634 +
   1.635 +    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
   1.636 +    // for the latest specification of Unicode Blocks.
   1.637 +
   1.638 +
   1.639 +    /**
   1.640 +     * The value of the {@code Character}.
   1.641 +     *
   1.642 +     * @serial
   1.643 +     */
   1.644 +    private final char value;
   1.645 +
   1.646 +    /** use serialVersionUID from JDK 1.0.2 for interoperability */
   1.647 +    private static final long serialVersionUID = 3786198910865385080L;
   1.648 +
   1.649 +    /**
   1.650 +     * Constructs a newly allocated {@code Character} object that
   1.651 +     * represents the specified {@code char} value.
   1.652 +     *
   1.653 +     * @param  value   the value to be represented by the
   1.654 +     *                  {@code Character} object.
   1.655 +     */
   1.656 +    public Character(char value) {
   1.657 +        this.value = value;
   1.658 +    }
   1.659 +
   1.660 +    private static class CharacterCache {
   1.661 +        private CharacterCache(){}
   1.662 +
   1.663 +        static final Character cache[] = new Character[127 + 1];
   1.664 +
   1.665 +        static {
   1.666 +            for (int i = 0; i < cache.length; i++)
   1.667 +                cache[i] = new Character((char)i);
   1.668 +        }
   1.669 +    }
   1.670 +
   1.671 +    /**
   1.672 +     * Returns a <tt>Character</tt> instance representing the specified
   1.673 +     * <tt>char</tt> value.
   1.674 +     * If a new <tt>Character</tt> instance is not required, this method
   1.675 +     * should generally be used in preference to the constructor
   1.676 +     * {@link #Character(char)}, as this method is likely to yield
   1.677 +     * significantly better space and time performance by caching
   1.678 +     * frequently requested values.
   1.679 +     *
   1.680 +     * This method will always cache values in the range {@code
   1.681 +     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
   1.682 +     * cache other values outside of this range.
   1.683 +     *
   1.684 +     * @param  c a char value.
   1.685 +     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
   1.686 +     * @since  1.5
   1.687 +     */
   1.688 +    public static Character valueOf(char c) {
   1.689 +        if (c <= 127) { // must cache
   1.690 +            return CharacterCache.cache[(int)c];
   1.691 +        }
   1.692 +        return new Character(c);
   1.693 +    }
   1.694 +
   1.695 +    /**
   1.696 +     * Returns the value of this {@code Character} object.
   1.697 +     * @return  the primitive {@code char} value represented by
   1.698 +     *          this object.
   1.699 +     */
   1.700 +    public char charValue() {
   1.701 +        return value;
   1.702 +    }
   1.703 +
   1.704 +    /**
   1.705 +     * Returns a hash code for this {@code Character}; equal to the result
   1.706 +     * of invoking {@code charValue()}.
   1.707 +     *
   1.708 +     * @return a hash code value for this {@code Character}
   1.709 +     */
   1.710 +    public int hashCode() {
   1.711 +        return (int)value;
   1.712 +    }
   1.713 +
   1.714 +    /**
   1.715 +     * Compares this object against the specified object.
   1.716 +     * The result is {@code true} if and only if the argument is not
   1.717 +     * {@code null} and is a {@code Character} object that
   1.718 +     * represents the same {@code char} value as this object.
   1.719 +     *
   1.720 +     * @param   obj   the object to compare with.
   1.721 +     * @return  {@code true} if the objects are the same;
   1.722 +     *          {@code false} otherwise.
   1.723 +     */
   1.724 +    public boolean equals(Object obj) {
   1.725 +        if (obj instanceof Character) {
   1.726 +            return value == ((Character)obj).charValue();
   1.727 +        }
   1.728 +        return false;
   1.729 +    }
   1.730 +
   1.731 +    /**
   1.732 +     * Returns a {@code String} object representing this
   1.733 +     * {@code Character}'s value.  The result is a string of
   1.734 +     * length 1 whose sole component is the primitive
   1.735 +     * {@code char} value represented by this
   1.736 +     * {@code Character} object.
   1.737 +     *
   1.738 +     * @return  a string representation of this object.
   1.739 +     */
   1.740 +    public String toString() {
   1.741 +        char buf[] = {value};
   1.742 +        return String.valueOf(buf);
   1.743 +    }
   1.744 +
   1.745 +    /**
   1.746 +     * Returns a {@code String} object representing the
   1.747 +     * specified {@code char}.  The result is a string of length
   1.748 +     * 1 consisting solely of the specified {@code char}.
   1.749 +     *
   1.750 +     * @param c the {@code char} to be converted
   1.751 +     * @return the string representation of the specified {@code char}
   1.752 +     * @since 1.4
   1.753 +     */
   1.754 +    public static String toString(char c) {
   1.755 +        return String.valueOf(c);
   1.756 +    }
   1.757 +
   1.758 +    /**
   1.759 +     * Determines whether the specified code point is a valid
   1.760 +     * <a href="http://www.unicode.org/glossary/#code_point">
   1.761 +     * Unicode code point value</a>.
   1.762 +     *
   1.763 +     * @param  codePoint the Unicode code point to be tested
   1.764 +     * @return {@code true} if the specified code point value is between
   1.765 +     *         {@link #MIN_CODE_POINT} and
   1.766 +     *         {@link #MAX_CODE_POINT} inclusive;
   1.767 +     *         {@code false} otherwise.
   1.768 +     * @since  1.5
   1.769 +     */
   1.770 +    public static boolean isValidCodePoint(int codePoint) {
   1.771 +        // Optimized form of:
   1.772 +        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
   1.773 +        int plane = codePoint >>> 16;
   1.774 +        return plane < ((MAX_CODE_POINT + 1) >>> 16);
   1.775 +    }
   1.776 +
   1.777 +    /**
   1.778 +     * Determines whether the specified character (Unicode code point)
   1.779 +     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
   1.780 +     * Such code points can be represented using a single {@code char}.
   1.781 +     *
   1.782 +     * @param  codePoint the character (Unicode code point) to be tested
   1.783 +     * @return {@code true} if the specified code point is between
   1.784 +     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
   1.785 +     *         {@code false} otherwise.
   1.786 +     * @since  1.7
   1.787 +     */
   1.788 +    public static boolean isBmpCodePoint(int codePoint) {
   1.789 +        return codePoint >>> 16 == 0;
   1.790 +        // Optimized form of:
   1.791 +        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
   1.792 +        // We consistently use logical shift (>>>) to facilitate
   1.793 +        // additional runtime optimizations.
   1.794 +    }
   1.795 +
   1.796 +    /**
   1.797 +     * Determines whether the specified character (Unicode code point)
   1.798 +     * is in the <a href="#supplementary">supplementary character</a> range.
   1.799 +     *
   1.800 +     * @param  codePoint the character (Unicode code point) to be tested
   1.801 +     * @return {@code true} if the specified code point is between
   1.802 +     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
   1.803 +     *         {@link #MAX_CODE_POINT} inclusive;
   1.804 +     *         {@code false} otherwise.
   1.805 +     * @since  1.5
   1.806 +     */
   1.807 +    public static boolean isSupplementaryCodePoint(int codePoint) {
   1.808 +        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
   1.809 +            && codePoint <  MAX_CODE_POINT + 1;
   1.810 +    }
   1.811 +
   1.812 +    /**
   1.813 +     * Determines if the given {@code char} value is a
   1.814 +     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   1.815 +     * Unicode high-surrogate code unit</a>
   1.816 +     * (also known as <i>leading-surrogate code unit</i>).
   1.817 +     *
   1.818 +     * <p>Such values do not represent characters by themselves,
   1.819 +     * but are used in the representation of
   1.820 +     * <a href="#supplementary">supplementary characters</a>
   1.821 +     * in the UTF-16 encoding.
   1.822 +     *
   1.823 +     * @param  ch the {@code char} value to be tested.
   1.824 +     * @return {@code true} if the {@code char} value is between
   1.825 +     *         {@link #MIN_HIGH_SURROGATE} and
   1.826 +     *         {@link #MAX_HIGH_SURROGATE} inclusive;
   1.827 +     *         {@code false} otherwise.
   1.828 +     * @see    Character#isLowSurrogate(char)
   1.829 +     * @see    Character.UnicodeBlock#of(int)
   1.830 +     * @since  1.5
   1.831 +     */
   1.832 +    public static boolean isHighSurrogate(char ch) {
   1.833 +        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
   1.834 +        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
   1.835 +    }
   1.836 +
   1.837 +    /**
   1.838 +     * Determines if the given {@code char} value is a
   1.839 +     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   1.840 +     * Unicode low-surrogate code unit</a>
   1.841 +     * (also known as <i>trailing-surrogate code unit</i>).
   1.842 +     *
   1.843 +     * <p>Such values do not represent characters by themselves,
   1.844 +     * but are used in the representation of
   1.845 +     * <a href="#supplementary">supplementary characters</a>
   1.846 +     * in the UTF-16 encoding.
   1.847 +     *
   1.848 +     * @param  ch the {@code char} value to be tested.
   1.849 +     * @return {@code true} if the {@code char} value is between
   1.850 +     *         {@link #MIN_LOW_SURROGATE} and
   1.851 +     *         {@link #MAX_LOW_SURROGATE} inclusive;
   1.852 +     *         {@code false} otherwise.
   1.853 +     * @see    Character#isHighSurrogate(char)
   1.854 +     * @since  1.5
   1.855 +     */
   1.856 +    public static boolean isLowSurrogate(char ch) {
   1.857 +        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
   1.858 +    }
   1.859 +
   1.860 +    /**
   1.861 +     * Determines if the given {@code char} value is a Unicode
   1.862 +     * <i>surrogate code unit</i>.
   1.863 +     *
   1.864 +     * <p>Such values do not represent characters by themselves,
   1.865 +     * but are used in the representation of
   1.866 +     * <a href="#supplementary">supplementary characters</a>
   1.867 +     * in the UTF-16 encoding.
   1.868 +     *
   1.869 +     * <p>A char value is a surrogate code unit if and only if it is either
   1.870 +     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
   1.871 +     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
   1.872 +     *
   1.873 +     * @param  ch the {@code char} value to be tested.
   1.874 +     * @return {@code true} if the {@code char} value is between
   1.875 +     *         {@link #MIN_SURROGATE} and
   1.876 +     *         {@link #MAX_SURROGATE} inclusive;
   1.877 +     *         {@code false} otherwise.
   1.878 +     * @since  1.7
   1.879 +     */
   1.880 +    public static boolean isSurrogate(char ch) {
   1.881 +        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
   1.882 +    }
   1.883 +
   1.884 +    /**
   1.885 +     * Determines whether the specified pair of {@code char}
   1.886 +     * values is a valid
   1.887 +     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
   1.888 +     * Unicode surrogate pair</a>.
   1.889 +
   1.890 +     * <p>This method is equivalent to the expression:
   1.891 +     * <blockquote><pre>
   1.892 +     * isHighSurrogate(high) && isLowSurrogate(low)
   1.893 +     * </pre></blockquote>
   1.894 +     *
   1.895 +     * @param  high the high-surrogate code value to be tested
   1.896 +     * @param  low the low-surrogate code value to be tested
   1.897 +     * @return {@code true} if the specified high and
   1.898 +     * low-surrogate code values represent a valid surrogate pair;
   1.899 +     * {@code false} otherwise.
   1.900 +     * @since  1.5
   1.901 +     */
   1.902 +    public static boolean isSurrogatePair(char high, char low) {
   1.903 +        return isHighSurrogate(high) && isLowSurrogate(low);
   1.904 +    }
   1.905 +
   1.906 +    /**
   1.907 +     * Determines the number of {@code char} values needed to
   1.908 +     * represent the specified character (Unicode code point). If the
   1.909 +     * specified character is equal to or greater than 0x10000, then
   1.910 +     * the method returns 2. Otherwise, the method returns 1.
   1.911 +     *
   1.912 +     * <p>This method doesn't validate the specified character to be a
   1.913 +     * valid Unicode code point. The caller must validate the
   1.914 +     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
   1.915 +     * if necessary.
   1.916 +     *
   1.917 +     * @param   codePoint the character (Unicode code point) to be tested.
   1.918 +     * @return  2 if the character is a valid supplementary character; 1 otherwise.
   1.919 +     * @see     Character#isSupplementaryCodePoint(int)
   1.920 +     * @since   1.5
   1.921 +     */
   1.922 +    public static int charCount(int codePoint) {
   1.923 +        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
   1.924 +    }
   1.925 +
   1.926 +    /**
   1.927 +     * Converts the specified surrogate pair to its supplementary code
   1.928 +     * point value. This method does not validate the specified
   1.929 +     * surrogate pair. The caller must validate it using {@link
   1.930 +     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
   1.931 +     *
   1.932 +     * @param  high the high-surrogate code unit
   1.933 +     * @param  low the low-surrogate code unit
   1.934 +     * @return the supplementary code point composed from the
   1.935 +     *         specified surrogate pair.
   1.936 +     * @since  1.5
   1.937 +     */
   1.938 +    public static int toCodePoint(char high, char low) {
   1.939 +        // Optimized form of:
   1.940 +        // return ((high - MIN_HIGH_SURROGATE) << 10)
   1.941 +        //         + (low - MIN_LOW_SURROGATE)
   1.942 +        //         + MIN_SUPPLEMENTARY_CODE_POINT;
   1.943 +        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
   1.944 +                                       - (MIN_HIGH_SURROGATE << 10)
   1.945 +                                       - MIN_LOW_SURROGATE);
   1.946 +    }
   1.947 +
   1.948 +    /**
   1.949 +     * Returns the code point at the given index of the
   1.950 +     * {@code CharSequence}. If the {@code char} value at
   1.951 +     * the given index in the {@code CharSequence} is in the
   1.952 +     * high-surrogate range, the following index is less than the
   1.953 +     * length of the {@code CharSequence}, and the
   1.954 +     * {@code char} value at the following index is in the
   1.955 +     * low-surrogate range, then the supplementary code point
   1.956 +     * corresponding to this surrogate pair is returned. Otherwise,
   1.957 +     * the {@code char} value at the given index is returned.
   1.958 +     *
   1.959 +     * @param seq a sequence of {@code char} values (Unicode code
   1.960 +     * units)
   1.961 +     * @param index the index to the {@code char} values (Unicode
   1.962 +     * code units) in {@code seq} to be converted
   1.963 +     * @return the Unicode code point at the given index
   1.964 +     * @exception NullPointerException if {@code seq} is null.
   1.965 +     * @exception IndexOutOfBoundsException if the value
   1.966 +     * {@code index} is negative or not less than
   1.967 +     * {@link CharSequence#length() seq.length()}.
   1.968 +     * @since  1.5
   1.969 +     */
   1.970 +    public static int codePointAt(CharSequence seq, int index) {
   1.971 +        char c1 = seq.charAt(index++);
   1.972 +        if (isHighSurrogate(c1)) {
   1.973 +            if (index < seq.length()) {
   1.974 +                char c2 = seq.charAt(index);
   1.975 +                if (isLowSurrogate(c2)) {
   1.976 +                    return toCodePoint(c1, c2);
   1.977 +                }
   1.978 +            }
   1.979 +        }
   1.980 +        return c1;
   1.981 +    }
   1.982 +
   1.983 +    /**
   1.984 +     * Returns the code point at the given index of the
   1.985 +     * {@code char} array. If the {@code char} value at
   1.986 +     * the given index in the {@code char} array is in the
   1.987 +     * high-surrogate range, the following index is less than the
   1.988 +     * length of the {@code char} array, and the
   1.989 +     * {@code char} value at the following index is in the
   1.990 +     * low-surrogate range, then the supplementary code point
   1.991 +     * corresponding to this surrogate pair is returned. Otherwise,
   1.992 +     * the {@code char} value at the given index is returned.
   1.993 +     *
   1.994 +     * @param a the {@code char} array
   1.995 +     * @param index the index to the {@code char} values (Unicode
   1.996 +     * code units) in the {@code char} array to be converted
   1.997 +     * @return the Unicode code point at the given index
   1.998 +     * @exception NullPointerException if {@code a} is null.
   1.999 +     * @exception IndexOutOfBoundsException if the value
  1.1000 +     * {@code index} is negative or not less than
  1.1001 +     * the length of the {@code char} array.
  1.1002 +     * @since  1.5
  1.1003 +     */
  1.1004 +    public static int codePointAt(char[] a, int index) {
  1.1005 +        return codePointAtImpl(a, index, a.length);
  1.1006 +    }
  1.1007 +
  1.1008 +    /**
  1.1009 +     * Returns the code point at the given index of the
  1.1010 +     * {@code char} array, where only array elements with
  1.1011 +     * {@code index} less than {@code limit} can be used. If
  1.1012 +     * the {@code char} value at the given index in the
  1.1013 +     * {@code char} array is in the high-surrogate range, the
  1.1014 +     * following index is less than the {@code limit}, and the
  1.1015 +     * {@code char} value at the following index is in the
  1.1016 +     * low-surrogate range, then the supplementary code point
  1.1017 +     * corresponding to this surrogate pair is returned. Otherwise,
  1.1018 +     * the {@code char} value at the given index is returned.
  1.1019 +     *
  1.1020 +     * @param a the {@code char} array
  1.1021 +     * @param index the index to the {@code char} values (Unicode
  1.1022 +     * code units) in the {@code char} array to be converted
  1.1023 +     * @param limit the index after the last array element that
  1.1024 +     * can be used in the {@code char} array
  1.1025 +     * @return the Unicode code point at the given index
  1.1026 +     * @exception NullPointerException if {@code a} is null.
  1.1027 +     * @exception IndexOutOfBoundsException if the {@code index}
  1.1028 +     * argument is negative or not less than the {@code limit}
  1.1029 +     * argument, or if the {@code limit} argument is negative or
  1.1030 +     * greater than the length of the {@code char} array.
  1.1031 +     * @since  1.5
  1.1032 +     */
  1.1033 +    public static int codePointAt(char[] a, int index, int limit) {
  1.1034 +        if (index >= limit || limit < 0 || limit > a.length) {
  1.1035 +            throw new IndexOutOfBoundsException();
  1.1036 +        }
  1.1037 +        return codePointAtImpl(a, index, limit);
  1.1038 +    }
  1.1039 +
  1.1040 +    // throws ArrayIndexOutofBoundsException if index out of bounds
  1.1041 +    static int codePointAtImpl(char[] a, int index, int limit) {
  1.1042 +        char c1 = a[index++];
  1.1043 +        if (isHighSurrogate(c1)) {
  1.1044 +            if (index < limit) {
  1.1045 +                char c2 = a[index];
  1.1046 +                if (isLowSurrogate(c2)) {
  1.1047 +                    return toCodePoint(c1, c2);
  1.1048 +                }
  1.1049 +            }
  1.1050 +        }
  1.1051 +        return c1;
  1.1052 +    }
  1.1053 +
  1.1054 +    /**
  1.1055 +     * Returns the code point preceding the given index of the
  1.1056 +     * {@code CharSequence}. If the {@code char} value at
  1.1057 +     * {@code (index - 1)} in the {@code CharSequence} is in
  1.1058 +     * the low-surrogate range, {@code (index - 2)} is not
  1.1059 +     * negative, and the {@code char} value at {@code (index - 2)}
  1.1060 +     * in the {@code CharSequence} is in the
  1.1061 +     * high-surrogate range, then the supplementary code point
  1.1062 +     * corresponding to this surrogate pair is returned. Otherwise,
  1.1063 +     * the {@code char} value at {@code (index - 1)} is
  1.1064 +     * returned.
  1.1065 +     *
  1.1066 +     * @param seq the {@code CharSequence} instance
  1.1067 +     * @param index the index following the code point that should be returned
  1.1068 +     * @return the Unicode code point value before the given index.
  1.1069 +     * @exception NullPointerException if {@code seq} is null.
  1.1070 +     * @exception IndexOutOfBoundsException if the {@code index}
  1.1071 +     * argument is less than 1 or greater than {@link
  1.1072 +     * CharSequence#length() seq.length()}.
  1.1073 +     * @since  1.5
  1.1074 +     */
  1.1075 +    public static int codePointBefore(CharSequence seq, int index) {
  1.1076 +        char c2 = seq.charAt(--index);
  1.1077 +        if (isLowSurrogate(c2)) {
  1.1078 +            if (index > 0) {
  1.1079 +                char c1 = seq.charAt(--index);
  1.1080 +                if (isHighSurrogate(c1)) {
  1.1081 +                    return toCodePoint(c1, c2);
  1.1082 +                }
  1.1083 +            }
  1.1084 +        }
  1.1085 +        return c2;
  1.1086 +    }
  1.1087 +
  1.1088 +    /**
  1.1089 +     * Returns the code point preceding the given index of the
  1.1090 +     * {@code char} array. If the {@code char} value at
  1.1091 +     * {@code (index - 1)} in the {@code char} array is in
  1.1092 +     * the low-surrogate range, {@code (index - 2)} is not
  1.1093 +     * negative, and the {@code char} value at {@code (index - 2)}
  1.1094 +     * in the {@code char} array is in the
  1.1095 +     * high-surrogate range, then the supplementary code point
  1.1096 +     * corresponding to this surrogate pair is returned. Otherwise,
  1.1097 +     * the {@code char} value at {@code (index - 1)} is
  1.1098 +     * returned.
  1.1099 +     *
  1.1100 +     * @param a the {@code char} array
  1.1101 +     * @param index the index following the code point that should be returned
  1.1102 +     * @return the Unicode code point value before the given index.
  1.1103 +     * @exception NullPointerException if {@code a} is null.
  1.1104 +     * @exception IndexOutOfBoundsException if the {@code index}
  1.1105 +     * argument is less than 1 or greater than the length of the
  1.1106 +     * {@code char} array
  1.1107 +     * @since  1.5
  1.1108 +     */
  1.1109 +    public static int codePointBefore(char[] a, int index) {
  1.1110 +        return codePointBeforeImpl(a, index, 0);
  1.1111 +    }
  1.1112 +
  1.1113 +    /**
  1.1114 +     * Returns the code point preceding the given index of the
  1.1115 +     * {@code char} array, where only array elements with
  1.1116 +     * {@code index} greater than or equal to {@code start}
  1.1117 +     * can be used. If the {@code char} value at {@code (index - 1)}
  1.1118 +     * in the {@code char} array is in the
  1.1119 +     * low-surrogate range, {@code (index - 2)} is not less than
  1.1120 +     * {@code start}, and the {@code char} value at
  1.1121 +     * {@code (index - 2)} in the {@code char} array is in
  1.1122 +     * the high-surrogate range, then the supplementary code point
  1.1123 +     * corresponding to this surrogate pair is returned. Otherwise,
  1.1124 +     * the {@code char} value at {@code (index - 1)} is
  1.1125 +     * returned.
  1.1126 +     *
  1.1127 +     * @param a the {@code char} array
  1.1128 +     * @param index the index following the code point that should be returned
  1.1129 +     * @param start the index of the first array element in the
  1.1130 +     * {@code char} array
  1.1131 +     * @return the Unicode code point value before the given index.
  1.1132 +     * @exception NullPointerException if {@code a} is null.
  1.1133 +     * @exception IndexOutOfBoundsException if the {@code index}
  1.1134 +     * argument is not greater than the {@code start} argument or
  1.1135 +     * is greater than the length of the {@code char} array, or
  1.1136 +     * if the {@code start} argument is negative or not less than
  1.1137 +     * the length of the {@code char} array.
  1.1138 +     * @since  1.5
  1.1139 +     */
  1.1140 +    public static int codePointBefore(char[] a, int index, int start) {
  1.1141 +        if (index <= start || start < 0 || start >= a.length) {
  1.1142 +            throw new IndexOutOfBoundsException();
  1.1143 +        }
  1.1144 +        return codePointBeforeImpl(a, index, start);
  1.1145 +    }
  1.1146 +
  1.1147 +    // throws ArrayIndexOutofBoundsException if index-1 out of bounds
  1.1148 +    static int codePointBeforeImpl(char[] a, int index, int start) {
  1.1149 +        char c2 = a[--index];
  1.1150 +        if (isLowSurrogate(c2)) {
  1.1151 +            if (index > start) {
  1.1152 +                char c1 = a[--index];
  1.1153 +                if (isHighSurrogate(c1)) {
  1.1154 +                    return toCodePoint(c1, c2);
  1.1155 +                }
  1.1156 +            }
  1.1157 +        }
  1.1158 +        return c2;
  1.1159 +    }
  1.1160 +
  1.1161 +    /**
  1.1162 +     * Returns the leading surrogate (a
  1.1163 +     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  1.1164 +     * high surrogate code unit</a>) of the
  1.1165 +     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1.1166 +     * surrogate pair</a>
  1.1167 +     * representing the specified supplementary character (Unicode
  1.1168 +     * code point) in the UTF-16 encoding.  If the specified character
  1.1169 +     * is not a
  1.1170 +     * <a href="Character.html#supplementary">supplementary character</a>,
  1.1171 +     * an unspecified {@code char} is returned.
  1.1172 +     *
  1.1173 +     * <p>If
  1.1174 +     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1.1175 +     * is {@code true}, then
  1.1176 +     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
  1.1177 +     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
  1.1178 +     * are also always {@code true}.
  1.1179 +     *
  1.1180 +     * @param   codePoint a supplementary character (Unicode code point)
  1.1181 +     * @return  the leading surrogate code unit used to represent the
  1.1182 +     *          character in the UTF-16 encoding
  1.1183 +     * @since   1.7
  1.1184 +     */
  1.1185 +    public static char highSurrogate(int codePoint) {
  1.1186 +        return (char) ((codePoint >>> 10)
  1.1187 +            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
  1.1188 +    }
  1.1189 +
  1.1190 +    /**
  1.1191 +     * Returns the trailing surrogate (a
  1.1192 +     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  1.1193 +     * low surrogate code unit</a>) of the
  1.1194 +     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  1.1195 +     * surrogate pair</a>
  1.1196 +     * representing the specified supplementary character (Unicode
  1.1197 +     * code point) in the UTF-16 encoding.  If the specified character
  1.1198 +     * is not a
  1.1199 +     * <a href="Character.html#supplementary">supplementary character</a>,
  1.1200 +     * an unspecified {@code char} is returned.
  1.1201 +     *
  1.1202 +     * <p>If
  1.1203 +     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  1.1204 +     * is {@code true}, then
  1.1205 +     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
  1.1206 +     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
  1.1207 +     * are also always {@code true}.
  1.1208 +     *
  1.1209 +     * @param   codePoint a supplementary character (Unicode code point)
  1.1210 +     * @return  the trailing surrogate code unit used to represent the
  1.1211 +     *          character in the UTF-16 encoding
  1.1212 +     * @since   1.7
  1.1213 +     */
  1.1214 +    public static char lowSurrogate(int codePoint) {
  1.1215 +        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  1.1216 +    }
  1.1217 +
  1.1218 +    /**
  1.1219 +     * Converts the specified character (Unicode code point) to its
  1.1220 +     * UTF-16 representation. If the specified code point is a BMP
  1.1221 +     * (Basic Multilingual Plane or Plane 0) value, the same value is
  1.1222 +     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
  1.1223 +     * specified code point is a supplementary character, its
  1.1224 +     * surrogate values are stored in {@code dst[dstIndex]}
  1.1225 +     * (high-surrogate) and {@code dst[dstIndex+1]}
  1.1226 +     * (low-surrogate), and 2 is returned.
  1.1227 +     *
  1.1228 +     * @param  codePoint the character (Unicode code point) to be converted.
  1.1229 +     * @param  dst an array of {@code char} in which the
  1.1230 +     * {@code codePoint}'s UTF-16 value is stored.
  1.1231 +     * @param dstIndex the start index into the {@code dst}
  1.1232 +     * array where the converted value is stored.
  1.1233 +     * @return 1 if the code point is a BMP code point, 2 if the
  1.1234 +     * code point is a supplementary code point.
  1.1235 +     * @exception IllegalArgumentException if the specified
  1.1236 +     * {@code codePoint} is not a valid Unicode code point.
  1.1237 +     * @exception NullPointerException if the specified {@code dst} is null.
  1.1238 +     * @exception IndexOutOfBoundsException if {@code dstIndex}
  1.1239 +     * is negative or not less than {@code dst.length}, or if
  1.1240 +     * {@code dst} at {@code dstIndex} doesn't have enough
  1.1241 +     * array element(s) to store the resulting {@code char}
  1.1242 +     * value(s). (If {@code dstIndex} is equal to
  1.1243 +     * {@code dst.length-1} and the specified
  1.1244 +     * {@code codePoint} is a supplementary character, the
  1.1245 +     * high-surrogate value is not stored in
  1.1246 +     * {@code dst[dstIndex]}.)
  1.1247 +     * @since  1.5
  1.1248 +     */
  1.1249 +    public static int toChars(int codePoint, char[] dst, int dstIndex) {
  1.1250 +        if (isBmpCodePoint(codePoint)) {
  1.1251 +            dst[dstIndex] = (char) codePoint;
  1.1252 +            return 1;
  1.1253 +        } else if (isValidCodePoint(codePoint)) {
  1.1254 +            toSurrogates(codePoint, dst, dstIndex);
  1.1255 +            return 2;
  1.1256 +        } else {
  1.1257 +            throw new IllegalArgumentException();
  1.1258 +        }
  1.1259 +    }
  1.1260 +
  1.1261 +    /**
  1.1262 +     * Converts the specified character (Unicode code point) to its
  1.1263 +     * UTF-16 representation stored in a {@code char} array. If
  1.1264 +     * the specified code point is a BMP (Basic Multilingual Plane or
  1.1265 +     * Plane 0) value, the resulting {@code char} array has
  1.1266 +     * the same value as {@code codePoint}. If the specified code
  1.1267 +     * point is a supplementary code point, the resulting
  1.1268 +     * {@code char} array has the corresponding surrogate pair.
  1.1269 +     *
  1.1270 +     * @param  codePoint a Unicode code point
  1.1271 +     * @return a {@code char} array having
  1.1272 +     *         {@code codePoint}'s UTF-16 representation.
  1.1273 +     * @exception IllegalArgumentException if the specified
  1.1274 +     * {@code codePoint} is not a valid Unicode code point.
  1.1275 +     * @since  1.5
  1.1276 +     */
  1.1277 +    public static char[] toChars(int codePoint) {
  1.1278 +        if (isBmpCodePoint(codePoint)) {
  1.1279 +            return new char[] { (char) codePoint };
  1.1280 +        } else if (isValidCodePoint(codePoint)) {
  1.1281 +            char[] result = new char[2];
  1.1282 +            toSurrogates(codePoint, result, 0);
  1.1283 +            return result;
  1.1284 +        } else {
  1.1285 +            throw new IllegalArgumentException();
  1.1286 +        }
  1.1287 +    }
  1.1288 +
  1.1289 +    static void toSurrogates(int codePoint, char[] dst, int index) {
  1.1290 +        // We write elements "backwards" to guarantee all-or-nothing
  1.1291 +        dst[index+1] = lowSurrogate(codePoint);
  1.1292 +        dst[index] = highSurrogate(codePoint);
  1.1293 +    }
  1.1294 +
  1.1295 +    /**
  1.1296 +     * Returns the number of Unicode code points in the text range of
  1.1297 +     * the specified char sequence. The text range begins at the
  1.1298 +     * specified {@code beginIndex} and extends to the
  1.1299 +     * {@code char} at index {@code endIndex - 1}. Thus the
  1.1300 +     * length (in {@code char}s) of the text range is
  1.1301 +     * {@code endIndex-beginIndex}. Unpaired surrogates within
  1.1302 +     * the text range count as one code point each.
  1.1303 +     *
  1.1304 +     * @param seq the char sequence
  1.1305 +     * @param beginIndex the index to the first {@code char} of
  1.1306 +     * the text range.
  1.1307 +     * @param endIndex the index after the last {@code char} of
  1.1308 +     * the text range.
  1.1309 +     * @return the number of Unicode code points in the specified text
  1.1310 +     * range
  1.1311 +     * @exception NullPointerException if {@code seq} is null.
  1.1312 +     * @exception IndexOutOfBoundsException if the
  1.1313 +     * {@code beginIndex} is negative, or {@code endIndex}
  1.1314 +     * is larger than the length of the given sequence, or
  1.1315 +     * {@code beginIndex} is larger than {@code endIndex}.
  1.1316 +     * @since  1.5
  1.1317 +     */
  1.1318 +    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
  1.1319 +        int length = seq.length();
  1.1320 +        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
  1.1321 +            throw new IndexOutOfBoundsException();
  1.1322 +        }
  1.1323 +        int n = endIndex - beginIndex;
  1.1324 +        for (int i = beginIndex; i < endIndex; ) {
  1.1325 +            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
  1.1326 +                isLowSurrogate(seq.charAt(i))) {
  1.1327 +                n--;
  1.1328 +                i++;
  1.1329 +            }
  1.1330 +        }
  1.1331 +        return n;
  1.1332 +    }
  1.1333 +
  1.1334 +    /**
  1.1335 +     * Returns the number of Unicode code points in a subarray of the
  1.1336 +     * {@code char} array argument. The {@code offset}
  1.1337 +     * argument is the index of the first {@code char} of the
  1.1338 +     * subarray and the {@code count} argument specifies the
  1.1339 +     * length of the subarray in {@code char}s. Unpaired
  1.1340 +     * surrogates within the subarray count as one code point each.
  1.1341 +     *
  1.1342 +     * @param a the {@code char} array
  1.1343 +     * @param offset the index of the first {@code char} in the
  1.1344 +     * given {@code char} array
  1.1345 +     * @param count the length of the subarray in {@code char}s
  1.1346 +     * @return the number of Unicode code points in the specified subarray
  1.1347 +     * @exception NullPointerException if {@code a} is null.
  1.1348 +     * @exception IndexOutOfBoundsException if {@code offset} or
  1.1349 +     * {@code count} is negative, or if {@code offset +
  1.1350 +     * count} is larger than the length of the given array.
  1.1351 +     * @since  1.5
  1.1352 +     */
  1.1353 +    public static int codePointCount(char[] a, int offset, int count) {
  1.1354 +        if (count > a.length - offset || offset < 0 || count < 0) {
  1.1355 +            throw new IndexOutOfBoundsException();
  1.1356 +        }
  1.1357 +        return codePointCountImpl(a, offset, count);
  1.1358 +    }
  1.1359 +
  1.1360 +    static int codePointCountImpl(char[] a, int offset, int count) {
  1.1361 +        int endIndex = offset + count;
  1.1362 +        int n = count;
  1.1363 +        for (int i = offset; i < endIndex; ) {
  1.1364 +            if (isHighSurrogate(a[i++]) && i < endIndex &&
  1.1365 +                isLowSurrogate(a[i])) {
  1.1366 +                n--;
  1.1367 +                i++;
  1.1368 +            }
  1.1369 +        }
  1.1370 +        return n;
  1.1371 +    }
  1.1372 +
  1.1373 +    /**
  1.1374 +     * Returns the index within the given char sequence that is offset
  1.1375 +     * from the given {@code index} by {@code codePointOffset}
  1.1376 +     * code points. Unpaired surrogates within the text range given by
  1.1377 +     * {@code index} and {@code codePointOffset} count as
  1.1378 +     * one code point each.
  1.1379 +     *
  1.1380 +     * @param seq the char sequence
  1.1381 +     * @param index the index to be offset
  1.1382 +     * @param codePointOffset the offset in code points
  1.1383 +     * @return the index within the char sequence
  1.1384 +     * @exception NullPointerException if {@code seq} is null.
  1.1385 +     * @exception IndexOutOfBoundsException if {@code index}
  1.1386 +     *   is negative or larger then the length of the char sequence,
  1.1387 +     *   or if {@code codePointOffset} is positive and the
  1.1388 +     *   subsequence starting with {@code index} has fewer than
  1.1389 +     *   {@code codePointOffset} code points, or if
  1.1390 +     *   {@code codePointOffset} is negative and the subsequence
  1.1391 +     *   before {@code index} has fewer than the absolute value
  1.1392 +     *   of {@code codePointOffset} code points.
  1.1393 +     * @since 1.5
  1.1394 +     */
  1.1395 +    public static int offsetByCodePoints(CharSequence seq, int index,
  1.1396 +                                         int codePointOffset) {
  1.1397 +        int length = seq.length();
  1.1398 +        if (index < 0 || index > length) {
  1.1399 +            throw new IndexOutOfBoundsException();
  1.1400 +        }
  1.1401 +
  1.1402 +        int x = index;
  1.1403 +        if (codePointOffset >= 0) {
  1.1404 +            int i;
  1.1405 +            for (i = 0; x < length && i < codePointOffset; i++) {
  1.1406 +                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
  1.1407 +                    isLowSurrogate(seq.charAt(x))) {
  1.1408 +                    x++;
  1.1409 +                }
  1.1410 +            }
  1.1411 +            if (i < codePointOffset) {
  1.1412 +                throw new IndexOutOfBoundsException();
  1.1413 +            }
  1.1414 +        } else {
  1.1415 +            int i;
  1.1416 +            for (i = codePointOffset; x > 0 && i < 0; i++) {
  1.1417 +                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
  1.1418 +                    isHighSurrogate(seq.charAt(x-1))) {
  1.1419 +                    x--;
  1.1420 +                }
  1.1421 +            }
  1.1422 +            if (i < 0) {
  1.1423 +                throw new IndexOutOfBoundsException();
  1.1424 +            }
  1.1425 +        }
  1.1426 +        return x;
  1.1427 +    }
  1.1428 +
  1.1429 +    /**
  1.1430 +     * Returns the index within the given {@code char} subarray
  1.1431 +     * that is offset from the given {@code index} by
  1.1432 +     * {@code codePointOffset} code points. The
  1.1433 +     * {@code start} and {@code count} arguments specify a
  1.1434 +     * subarray of the {@code char} array. Unpaired surrogates
  1.1435 +     * within the text range given by {@code index} and
  1.1436 +     * {@code codePointOffset} count as one code point each.
  1.1437 +     *
  1.1438 +     * @param a the {@code char} array
  1.1439 +     * @param start the index of the first {@code char} of the
  1.1440 +     * subarray
  1.1441 +     * @param count the length of the subarray in {@code char}s
  1.1442 +     * @param index the index to be offset
  1.1443 +     * @param codePointOffset the offset in code points
  1.1444 +     * @return the index within the subarray
  1.1445 +     * @exception NullPointerException if {@code a} is null.
  1.1446 +     * @exception IndexOutOfBoundsException
  1.1447 +     *   if {@code start} or {@code count} is negative,
  1.1448 +     *   or if {@code start + count} is larger than the length of
  1.1449 +     *   the given array,
  1.1450 +     *   or if {@code index} is less than {@code start} or
  1.1451 +     *   larger then {@code start + count},
  1.1452 +     *   or if {@code codePointOffset} is positive and the text range
  1.1453 +     *   starting with {@code index} and ending with {@code start + count - 1}
  1.1454 +     *   has fewer than {@code codePointOffset} code
  1.1455 +     *   points,
  1.1456 +     *   or if {@code codePointOffset} is negative and the text range
  1.1457 +     *   starting with {@code start} and ending with {@code index - 1}
  1.1458 +     *   has fewer than the absolute value of
  1.1459 +     *   {@code codePointOffset} code points.
  1.1460 +     * @since 1.5
  1.1461 +     */
  1.1462 +    public static int offsetByCodePoints(char[] a, int start, int count,
  1.1463 +                                         int index, int codePointOffset) {
  1.1464 +        if (count > a.length-start || start < 0 || count < 0
  1.1465 +            || index < start || index > start+count) {
  1.1466 +            throw new IndexOutOfBoundsException();
  1.1467 +        }
  1.1468 +        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
  1.1469 +    }
  1.1470 +
  1.1471 +    static int offsetByCodePointsImpl(char[]a, int start, int count,
  1.1472 +                                      int index, int codePointOffset) {
  1.1473 +        int x = index;
  1.1474 +        if (codePointOffset >= 0) {
  1.1475 +            int limit = start + count;
  1.1476 +            int i;
  1.1477 +            for (i = 0; x < limit && i < codePointOffset; i++) {
  1.1478 +                if (isHighSurrogate(a[x++]) && x < limit &&
  1.1479 +                    isLowSurrogate(a[x])) {
  1.1480 +                    x++;
  1.1481 +                }
  1.1482 +            }
  1.1483 +            if (i < codePointOffset) {
  1.1484 +                throw new IndexOutOfBoundsException();
  1.1485 +            }
  1.1486 +        } else {
  1.1487 +            int i;
  1.1488 +            for (i = codePointOffset; x > start && i < 0; i++) {
  1.1489 +                if (isLowSurrogate(a[--x]) && x > start &&
  1.1490 +                    isHighSurrogate(a[x-1])) {
  1.1491 +                    x--;
  1.1492 +                }
  1.1493 +            }
  1.1494 +            if (i < 0) {
  1.1495 +                throw new IndexOutOfBoundsException();
  1.1496 +            }
  1.1497 +        }
  1.1498 +        return x;
  1.1499 +    }
  1.1500 +
  1.1501 +    /**
  1.1502 +     * Determines if the specified character is a lowercase character.
  1.1503 +     * <p>
  1.1504 +     * A character is lowercase if its general category type, provided
  1.1505 +     * by {@code Character.getType(ch)}, is
  1.1506 +     * {@code LOWERCASE_LETTER}, or it has contributory property
  1.1507 +     * Other_Lowercase as defined by the Unicode Standard.
  1.1508 +     * <p>
  1.1509 +     * The following are examples of lowercase characters:
  1.1510 +     * <p><blockquote><pre>
  1.1511 +     * a b c d e f g h i j k l m n o p q r s t u v w x y z
  1.1512 +     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
  1.1513 +     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
  1.1514 +     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
  1.1515 +     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
  1.1516 +     * </pre></blockquote>
  1.1517 +     * <p> Many other Unicode characters are lowercase too.
  1.1518 +     *
  1.1519 +     * <p><b>Note:</b> This method cannot handle <a
  1.1520 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1521 +     * all Unicode characters, including supplementary characters, use
  1.1522 +     * the {@link #isLowerCase(int)} method.
  1.1523 +     *
  1.1524 +     * @param   ch   the character to be tested.
  1.1525 +     * @return  {@code true} if the character is lowercase;
  1.1526 +     *          {@code false} otherwise.
  1.1527 +     * @see     Character#isLowerCase(char)
  1.1528 +     * @see     Character#isTitleCase(char)
  1.1529 +     * @see     Character#toLowerCase(char)
  1.1530 +     * @see     Character#getType(char)
  1.1531 +     */
  1.1532 +    public static boolean isLowerCase(char ch) {
  1.1533 +        return ch == toLowerCase(ch);
  1.1534 +    }
  1.1535 +
  1.1536 +    /**
  1.1537 +     * Determines if the specified character is an uppercase character.
  1.1538 +     * <p>
  1.1539 +     * A character is uppercase if its general category type, provided by
  1.1540 +     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
  1.1541 +     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
  1.1542 +     * <p>
  1.1543 +     * The following are examples of uppercase characters:
  1.1544 +     * <p><blockquote><pre>
  1.1545 +     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  1.1546 +     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
  1.1547 +     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
  1.1548 +     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
  1.1549 +     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
  1.1550 +     * </pre></blockquote>
  1.1551 +     * <p> Many other Unicode characters are uppercase too.<p>
  1.1552 +     *
  1.1553 +     * <p><b>Note:</b> This method cannot handle <a
  1.1554 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1555 +     * all Unicode characters, including supplementary characters, use
  1.1556 +     * the {@link #isUpperCase(int)} method.
  1.1557 +     *
  1.1558 +     * @param   ch   the character to be tested.
  1.1559 +     * @return  {@code true} if the character is uppercase;
  1.1560 +     *          {@code false} otherwise.
  1.1561 +     * @see     Character#isLowerCase(char)
  1.1562 +     * @see     Character#isTitleCase(char)
  1.1563 +     * @see     Character#toUpperCase(char)
  1.1564 +     * @see     Character#getType(char)
  1.1565 +     * @since   1.0
  1.1566 +     */
  1.1567 +    public static boolean isUpperCase(char ch) {
  1.1568 +        return ch == toUpperCase(ch);
  1.1569 +    }
  1.1570 +
  1.1571 +    /**
  1.1572 +     * Determines if the specified character is a titlecase character.
  1.1573 +     * <p>
  1.1574 +     * A character is a titlecase character if its general
  1.1575 +     * category type, provided by {@code Character.getType(ch)},
  1.1576 +     * is {@code TITLECASE_LETTER}.
  1.1577 +     * <p>
  1.1578 +     * Some characters look like pairs of Latin letters. For example, there
  1.1579 +     * is an uppercase letter that looks like "LJ" and has a corresponding
  1.1580 +     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1.1581 +     * is the appropriate form to use when rendering a word in lowercase
  1.1582 +     * with initial capitals, as for a book title.
  1.1583 +     * <p>
  1.1584 +     * These are some of the Unicode characters for which this method returns
  1.1585 +     * {@code true}:
  1.1586 +     * <ul>
  1.1587 +     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1.1588 +     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1.1589 +     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1.1590 +     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1.1591 +     * </ul>
  1.1592 +     * <p> Many other Unicode characters are titlecase too.<p>
  1.1593 +     *
  1.1594 +     * <p><b>Note:</b> This method cannot handle <a
  1.1595 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1596 +     * all Unicode characters, including supplementary characters, use
  1.1597 +     * the {@link #isTitleCase(int)} method.
  1.1598 +     *
  1.1599 +     * @param   ch   the character to be tested.
  1.1600 +     * @return  {@code true} if the character is titlecase;
  1.1601 +     *          {@code false} otherwise.
  1.1602 +     * @see     Character#isLowerCase(char)
  1.1603 +     * @see     Character#isUpperCase(char)
  1.1604 +     * @see     Character#toTitleCase(char)
  1.1605 +     * @see     Character#getType(char)
  1.1606 +     * @since   1.0.2
  1.1607 +     */
  1.1608 +    public static boolean isTitleCase(char ch) {
  1.1609 +        return isTitleCase((int)ch);
  1.1610 +    }
  1.1611 +
  1.1612 +    /**
  1.1613 +     * Determines if the specified character (Unicode code point) is a titlecase character.
  1.1614 +     * <p>
  1.1615 +     * A character is a titlecase character if its general
  1.1616 +     * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1.1617 +     * is {@code TITLECASE_LETTER}.
  1.1618 +     * <p>
  1.1619 +     * Some characters look like pairs of Latin letters. For example, there
  1.1620 +     * is an uppercase letter that looks like "LJ" and has a corresponding
  1.1621 +     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  1.1622 +     * is the appropriate form to use when rendering a word in lowercase
  1.1623 +     * with initial capitals, as for a book title.
  1.1624 +     * <p>
  1.1625 +     * These are some of the Unicode characters for which this method returns
  1.1626 +     * {@code true}:
  1.1627 +     * <ul>
  1.1628 +     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  1.1629 +     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  1.1630 +     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  1.1631 +     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  1.1632 +     * </ul>
  1.1633 +     * <p> Many other Unicode characters are titlecase too.<p>
  1.1634 +     *
  1.1635 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.1636 +     * @return  {@code true} if the character is titlecase;
  1.1637 +     *          {@code false} otherwise.
  1.1638 +     * @see     Character#isLowerCase(int)
  1.1639 +     * @see     Character#isUpperCase(int)
  1.1640 +     * @see     Character#toTitleCase(int)
  1.1641 +     * @see     Character#getType(int)
  1.1642 +     * @since   1.5
  1.1643 +     */
  1.1644 +    public static boolean isTitleCase(int codePoint) {
  1.1645 +        return getType(codePoint) == Character.TITLECASE_LETTER;
  1.1646 +    }
  1.1647 +
  1.1648 +    /**
  1.1649 +     * Determines if the specified character is a digit.
  1.1650 +     * <p>
  1.1651 +     * A character is a digit if its general category type, provided
  1.1652 +     * by {@code Character.getType(ch)}, is
  1.1653 +     * {@code DECIMAL_DIGIT_NUMBER}.
  1.1654 +     * <p>
  1.1655 +     * Some Unicode character ranges that contain digits:
  1.1656 +     * <ul>
  1.1657 +     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1.1658 +     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1.1659 +     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1.1660 +     *     Arabic-Indic digits
  1.1661 +     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1.1662 +     *     Extended Arabic-Indic digits
  1.1663 +     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1.1664 +     *     Devanagari digits
  1.1665 +     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1.1666 +     *     Fullwidth digits
  1.1667 +     * </ul>
  1.1668 +     *
  1.1669 +     * Many other character ranges contain digits as well.
  1.1670 +     *
  1.1671 +     * <p><b>Note:</b> This method cannot handle <a
  1.1672 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1673 +     * all Unicode characters, including supplementary characters, use
  1.1674 +     * the {@link #isDigit(int)} method.
  1.1675 +     *
  1.1676 +     * @param   ch   the character to be tested.
  1.1677 +     * @return  {@code true} if the character is a digit;
  1.1678 +     *          {@code false} otherwise.
  1.1679 +     * @see     Character#digit(char, int)
  1.1680 +     * @see     Character#forDigit(int, int)
  1.1681 +     * @see     Character#getType(char)
  1.1682 +     */
  1.1683 +    public static boolean isDigit(char ch) {
  1.1684 +        return String.valueOf(ch).matches("\\d");
  1.1685 +    }
  1.1686 +
  1.1687 +    /**
  1.1688 +     * Determines if the specified character (Unicode code point) is a digit.
  1.1689 +     * <p>
  1.1690 +     * A character is a digit if its general category type, provided
  1.1691 +     * by {@link Character#getType(int) getType(codePoint)}, is
  1.1692 +     * {@code DECIMAL_DIGIT_NUMBER}.
  1.1693 +     * <p>
  1.1694 +     * Some Unicode character ranges that contain digits:
  1.1695 +     * <ul>
  1.1696 +     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  1.1697 +     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  1.1698 +     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  1.1699 +     *     Arabic-Indic digits
  1.1700 +     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  1.1701 +     *     Extended Arabic-Indic digits
  1.1702 +     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  1.1703 +     *     Devanagari digits
  1.1704 +     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  1.1705 +     *     Fullwidth digits
  1.1706 +     * </ul>
  1.1707 +     *
  1.1708 +     * Many other character ranges contain digits as well.
  1.1709 +     *
  1.1710 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.1711 +     * @return  {@code true} if the character is a digit;
  1.1712 +     *          {@code false} otherwise.
  1.1713 +     * @see     Character#forDigit(int, int)
  1.1714 +     * @see     Character#getType(int)
  1.1715 +     * @since   1.5
  1.1716 +     */
  1.1717 +    public static boolean isDigit(int codePoint) {
  1.1718 +        return fromCodeChars(codePoint).matches("\\d");
  1.1719 +    }
  1.1720 +    
  1.1721 +    @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
  1.1722 +    private native static String fromCodeChars(int codePoint);
  1.1723 +
  1.1724 +    /**
  1.1725 +     * Determines if a character is defined in Unicode.
  1.1726 +     * <p>
  1.1727 +     * A character is defined if at least one of the following is true:
  1.1728 +     * <ul>
  1.1729 +     * <li>It has an entry in the UnicodeData file.
  1.1730 +     * <li>It has a value in a range defined by the UnicodeData file.
  1.1731 +     * </ul>
  1.1732 +     *
  1.1733 +     * <p><b>Note:</b> This method cannot handle <a
  1.1734 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1735 +     * all Unicode characters, including supplementary characters, use
  1.1736 +     * the {@link #isDefined(int)} method.
  1.1737 +     *
  1.1738 +     * @param   ch   the character to be tested
  1.1739 +     * @return  {@code true} if the character has a defined meaning
  1.1740 +     *          in Unicode; {@code false} otherwise.
  1.1741 +     * @see     Character#isDigit(char)
  1.1742 +     * @see     Character#isLetter(char)
  1.1743 +     * @see     Character#isLetterOrDigit(char)
  1.1744 +     * @see     Character#isLowerCase(char)
  1.1745 +     * @see     Character#isTitleCase(char)
  1.1746 +     * @see     Character#isUpperCase(char)
  1.1747 +     * @since   1.0.2
  1.1748 +     */
  1.1749 +    public static boolean isDefined(char ch) {
  1.1750 +        return isDefined((int)ch);
  1.1751 +    }
  1.1752 +
  1.1753 +    /**
  1.1754 +     * Determines if a character (Unicode code point) is defined in Unicode.
  1.1755 +     * <p>
  1.1756 +     * A character is defined if at least one of the following is true:
  1.1757 +     * <ul>
  1.1758 +     * <li>It has an entry in the UnicodeData file.
  1.1759 +     * <li>It has a value in a range defined by the UnicodeData file.
  1.1760 +     * </ul>
  1.1761 +     *
  1.1762 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.1763 +     * @return  {@code true} if the character has a defined meaning
  1.1764 +     *          in Unicode; {@code false} otherwise.
  1.1765 +     * @see     Character#isDigit(int)
  1.1766 +     * @see     Character#isLetter(int)
  1.1767 +     * @see     Character#isLetterOrDigit(int)
  1.1768 +     * @see     Character#isLowerCase(int)
  1.1769 +     * @see     Character#isTitleCase(int)
  1.1770 +     * @see     Character#isUpperCase(int)
  1.1771 +     * @since   1.5
  1.1772 +     */
  1.1773 +    public static boolean isDefined(int codePoint) {
  1.1774 +        return getType(codePoint) != Character.UNASSIGNED;
  1.1775 +    }
  1.1776 +
  1.1777 +    /**
  1.1778 +     * Determines if the specified character is a letter.
  1.1779 +     * <p>
  1.1780 +     * A character is considered to be a letter if its general
  1.1781 +     * category type, provided by {@code Character.getType(ch)},
  1.1782 +     * is any of the following:
  1.1783 +     * <ul>
  1.1784 +     * <li> {@code UPPERCASE_LETTER}
  1.1785 +     * <li> {@code LOWERCASE_LETTER}
  1.1786 +     * <li> {@code TITLECASE_LETTER}
  1.1787 +     * <li> {@code MODIFIER_LETTER}
  1.1788 +     * <li> {@code OTHER_LETTER}
  1.1789 +     * </ul>
  1.1790 +     *
  1.1791 +     * Not all letters have case. Many characters are
  1.1792 +     * letters but are neither uppercase nor lowercase nor titlecase.
  1.1793 +     *
  1.1794 +     * <p><b>Note:</b> This method cannot handle <a
  1.1795 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1796 +     * all Unicode characters, including supplementary characters, use
  1.1797 +     * the {@link #isLetter(int)} method.
  1.1798 +     *
  1.1799 +     * @param   ch   the character to be tested.
  1.1800 +     * @return  {@code true} if the character is a letter;
  1.1801 +     *          {@code false} otherwise.
  1.1802 +     * @see     Character#isDigit(char)
  1.1803 +     * @see     Character#isJavaIdentifierStart(char)
  1.1804 +     * @see     Character#isJavaLetter(char)
  1.1805 +     * @see     Character#isJavaLetterOrDigit(char)
  1.1806 +     * @see     Character#isLetterOrDigit(char)
  1.1807 +     * @see     Character#isLowerCase(char)
  1.1808 +     * @see     Character#isTitleCase(char)
  1.1809 +     * @see     Character#isUnicodeIdentifierStart(char)
  1.1810 +     * @see     Character#isUpperCase(char)
  1.1811 +     */
  1.1812 +    public static boolean isLetter(char ch) {
  1.1813 +        return String.valueOf(ch).matches("\\w") && !isDigit(ch);
  1.1814 +    }
  1.1815 +
  1.1816 +    /**
  1.1817 +     * Determines if the specified character (Unicode code point) is a letter.
  1.1818 +     * <p>
  1.1819 +     * A character is considered to be a letter if its general
  1.1820 +     * category type, provided by {@link Character#getType(int) getType(codePoint)},
  1.1821 +     * is any of the following:
  1.1822 +     * <ul>
  1.1823 +     * <li> {@code UPPERCASE_LETTER}
  1.1824 +     * <li> {@code LOWERCASE_LETTER}
  1.1825 +     * <li> {@code TITLECASE_LETTER}
  1.1826 +     * <li> {@code MODIFIER_LETTER}
  1.1827 +     * <li> {@code OTHER_LETTER}
  1.1828 +     * </ul>
  1.1829 +     *
  1.1830 +     * Not all letters have case. Many characters are
  1.1831 +     * letters but are neither uppercase nor lowercase nor titlecase.
  1.1832 +     *
  1.1833 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.1834 +     * @return  {@code true} if the character is a letter;
  1.1835 +     *          {@code false} otherwise.
  1.1836 +     * @see     Character#isDigit(int)
  1.1837 +     * @see     Character#isJavaIdentifierStart(int)
  1.1838 +     * @see     Character#isLetterOrDigit(int)
  1.1839 +     * @see     Character#isLowerCase(int)
  1.1840 +     * @see     Character#isTitleCase(int)
  1.1841 +     * @see     Character#isUnicodeIdentifierStart(int)
  1.1842 +     * @see     Character#isUpperCase(int)
  1.1843 +     * @since   1.5
  1.1844 +     */
  1.1845 +    public static boolean isLetter(int codePoint) {
  1.1846 +        return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
  1.1847 +    }
  1.1848 +
  1.1849 +    /**
  1.1850 +     * Determines if the specified character is a letter or digit.
  1.1851 +     * <p>
  1.1852 +     * A character is considered to be a letter or digit if either
  1.1853 +     * {@code Character.isLetter(char ch)} or
  1.1854 +     * {@code Character.isDigit(char ch)} returns
  1.1855 +     * {@code true} for the character.
  1.1856 +     *
  1.1857 +     * <p><b>Note:</b> This method cannot handle <a
  1.1858 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1859 +     * all Unicode characters, including supplementary characters, use
  1.1860 +     * the {@link #isLetterOrDigit(int)} method.
  1.1861 +     *
  1.1862 +     * @param   ch   the character to be tested.
  1.1863 +     * @return  {@code true} if the character is a letter or digit;
  1.1864 +     *          {@code false} otherwise.
  1.1865 +     * @see     Character#isDigit(char)
  1.1866 +     * @see     Character#isJavaIdentifierPart(char)
  1.1867 +     * @see     Character#isJavaLetter(char)
  1.1868 +     * @see     Character#isJavaLetterOrDigit(char)
  1.1869 +     * @see     Character#isLetter(char)
  1.1870 +     * @see     Character#isUnicodeIdentifierPart(char)
  1.1871 +     * @since   1.0.2
  1.1872 +     */
  1.1873 +    public static boolean isLetterOrDigit(char ch) {
  1.1874 +        return String.valueOf(ch).matches("\\w");
  1.1875 +    }
  1.1876 +
  1.1877 +    /**
  1.1878 +     * Determines if the specified character (Unicode code point) is a letter or digit.
  1.1879 +     * <p>
  1.1880 +     * A character is considered to be a letter or digit if either
  1.1881 +     * {@link #isLetter(int) isLetter(codePoint)} or
  1.1882 +     * {@link #isDigit(int) isDigit(codePoint)} returns
  1.1883 +     * {@code true} for the character.
  1.1884 +     *
  1.1885 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.1886 +     * @return  {@code true} if the character is a letter or digit;
  1.1887 +     *          {@code false} otherwise.
  1.1888 +     * @see     Character#isDigit(int)
  1.1889 +     * @see     Character#isJavaIdentifierPart(int)
  1.1890 +     * @see     Character#isLetter(int)
  1.1891 +     * @see     Character#isUnicodeIdentifierPart(int)
  1.1892 +     * @since   1.5
  1.1893 +     */
  1.1894 +    public static boolean isLetterOrDigit(int codePoint) {
  1.1895 +        return fromCodeChars(codePoint).matches("\\w");
  1.1896 +    }
  1.1897 +    
  1.1898 +    static int getType(int x) {
  1.1899 +        throw new UnsupportedOperationException();
  1.1900 +    }
  1.1901 + 
  1.1902 +    /**
  1.1903 +     * Determines if the specified character is
  1.1904 +     * permissible as the first character in a Java identifier.
  1.1905 +     * <p>
  1.1906 +     * A character may start a Java identifier if and only if
  1.1907 +     * one of the following conditions is true:
  1.1908 +     * <ul>
  1.1909 +     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
  1.1910 +     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
  1.1911 +     * <li> {@code ch} is a currency symbol (such as {@code '$'})
  1.1912 +     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
  1.1913 +     * </ul>
  1.1914 +     *
  1.1915 +     * <p><b>Note:</b> This method cannot handle <a
  1.1916 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1917 +     * all Unicode characters, including supplementary characters, use
  1.1918 +     * the {@link #isJavaIdentifierStart(int)} method.
  1.1919 +     *
  1.1920 +     * @param   ch the character to be tested.
  1.1921 +     * @return  {@code true} if the character may start a Java identifier;
  1.1922 +     *          {@code false} otherwise.
  1.1923 +     * @see     Character#isJavaIdentifierPart(char)
  1.1924 +     * @see     Character#isLetter(char)
  1.1925 +     * @see     Character#isUnicodeIdentifierStart(char)
  1.1926 +     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1.1927 +     * @since   1.1
  1.1928 +     */
  1.1929 +    public static boolean isJavaIdentifierStart(char ch) {
  1.1930 +        return isJavaIdentifierStart((int)ch);
  1.1931 +    }
  1.1932 +
  1.1933 +    /**
  1.1934 +     * Determines if the character (Unicode code point) is
  1.1935 +     * permissible as the first character in a Java identifier.
  1.1936 +     * <p>
  1.1937 +     * A character may start a Java identifier if and only if
  1.1938 +     * one of the following conditions is true:
  1.1939 +     * <ul>
  1.1940 +     * <li> {@link #isLetter(int) isLetter(codePoint)}
  1.1941 +     *      returns {@code true}
  1.1942 +     * <li> {@link #getType(int) getType(codePoint)}
  1.1943 +     *      returns {@code LETTER_NUMBER}
  1.1944 +     * <li> the referenced character is a currency symbol (such as {@code '$'})
  1.1945 +     * <li> the referenced character is a connecting punctuation character
  1.1946 +     *      (such as {@code '_'}).
  1.1947 +     * </ul>
  1.1948 +     *
  1.1949 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.1950 +     * @return  {@code true} if the character may start a Java identifier;
  1.1951 +     *          {@code false} otherwise.
  1.1952 +     * @see     Character#isJavaIdentifierPart(int)
  1.1953 +     * @see     Character#isLetter(int)
  1.1954 +     * @see     Character#isUnicodeIdentifierStart(int)
  1.1955 +     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1.1956 +     * @since   1.5
  1.1957 +     */
  1.1958 +    public static boolean isJavaIdentifierStart(int codePoint) {
  1.1959 +        return 
  1.1960 +            ('A' <= codePoint && codePoint <= 'Z') ||
  1.1961 +            ('a' <= codePoint && codePoint <= 'z');
  1.1962 +    }
  1.1963 +
  1.1964 +    /**
  1.1965 +     * Determines if the specified character may be part of a Java
  1.1966 +     * identifier as other than the first character.
  1.1967 +     * <p>
  1.1968 +     * A character may be part of a Java identifier if any of the following
  1.1969 +     * are true:
  1.1970 +     * <ul>
  1.1971 +     * <li>  it is a letter
  1.1972 +     * <li>  it is a currency symbol (such as {@code '$'})
  1.1973 +     * <li>  it is a connecting punctuation character (such as {@code '_'})
  1.1974 +     * <li>  it is a digit
  1.1975 +     * <li>  it is a numeric letter (such as a Roman numeral character)
  1.1976 +     * <li>  it is a combining mark
  1.1977 +     * <li>  it is a non-spacing mark
  1.1978 +     * <li> {@code isIdentifierIgnorable} returns
  1.1979 +     * {@code true} for the character
  1.1980 +     * </ul>
  1.1981 +     *
  1.1982 +     * <p><b>Note:</b> This method cannot handle <a
  1.1983 +     * href="#supplementary"> supplementary characters</a>. To support
  1.1984 +     * all Unicode characters, including supplementary characters, use
  1.1985 +     * the {@link #isJavaIdentifierPart(int)} method.
  1.1986 +     *
  1.1987 +     * @param   ch      the character to be tested.
  1.1988 +     * @return {@code true} if the character may be part of a
  1.1989 +     *          Java identifier; {@code false} otherwise.
  1.1990 +     * @see     Character#isIdentifierIgnorable(char)
  1.1991 +     * @see     Character#isJavaIdentifierStart(char)
  1.1992 +     * @see     Character#isLetterOrDigit(char)
  1.1993 +     * @see     Character#isUnicodeIdentifierPart(char)
  1.1994 +     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1.1995 +     * @since   1.1
  1.1996 +     */
  1.1997 +    public static boolean isJavaIdentifierPart(char ch) {
  1.1998 +        return isJavaIdentifierPart((int)ch);
  1.1999 +    }
  1.2000 +
  1.2001 +    /**
  1.2002 +     * Determines if the character (Unicode code point) may be part of a Java
  1.2003 +     * identifier as other than the first character.
  1.2004 +     * <p>
  1.2005 +     * A character may be part of a Java identifier if any of the following
  1.2006 +     * are true:
  1.2007 +     * <ul>
  1.2008 +     * <li>  it is a letter
  1.2009 +     * <li>  it is a currency symbol (such as {@code '$'})
  1.2010 +     * <li>  it is a connecting punctuation character (such as {@code '_'})
  1.2011 +     * <li>  it is a digit
  1.2012 +     * <li>  it is a numeric letter (such as a Roman numeral character)
  1.2013 +     * <li>  it is a combining mark
  1.2014 +     * <li>  it is a non-spacing mark
  1.2015 +     * <li> {@link #isIdentifierIgnorable(int)
  1.2016 +     * isIdentifierIgnorable(codePoint)} returns {@code true} for
  1.2017 +     * the character
  1.2018 +     * </ul>
  1.2019 +     *
  1.2020 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.2021 +     * @return {@code true} if the character may be part of a
  1.2022 +     *          Java identifier; {@code false} otherwise.
  1.2023 +     * @see     Character#isIdentifierIgnorable(int)
  1.2024 +     * @see     Character#isJavaIdentifierStart(int)
  1.2025 +     * @see     Character#isLetterOrDigit(int)
  1.2026 +     * @see     Character#isUnicodeIdentifierPart(int)
  1.2027 +     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  1.2028 +     * @since   1.5
  1.2029 +     */
  1.2030 +    public static boolean isJavaIdentifierPart(int codePoint) {
  1.2031 +        return isJavaIdentifierStart(codePoint) ||
  1.2032 +            ('0' <= codePoint && codePoint <= '9') || codePoint == '$';
  1.2033 +    }
  1.2034 +   
  1.2035 +    /**
  1.2036 +     * Converts the character argument to lowercase using case
  1.2037 +     * mapping information from the UnicodeData file.
  1.2038 +     * <p>
  1.2039 +     * Note that
  1.2040 +     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
  1.2041 +     * does not always return {@code true} for some ranges of
  1.2042 +     * characters, particularly those that are symbols or ideographs.
  1.2043 +     *
  1.2044 +     * <p>In general, {@link String#toLowerCase()} should be used to map
  1.2045 +     * characters to lowercase. {@code String} case mapping methods
  1.2046 +     * have several benefits over {@code Character} case mapping methods.
  1.2047 +     * {@code String} case mapping methods can perform locale-sensitive
  1.2048 +     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  1.2049 +     * the {@code Character} case mapping methods cannot.
  1.2050 +     *
  1.2051 +     * <p><b>Note:</b> This method cannot handle <a
  1.2052 +     * href="#supplementary"> supplementary characters</a>. To support
  1.2053 +     * all Unicode characters, including supplementary characters, use
  1.2054 +     * the {@link #toLowerCase(int)} method.
  1.2055 +     *
  1.2056 +     * @param   ch   the character to be converted.
  1.2057 +     * @return  the lowercase equivalent of the character, if any;
  1.2058 +     *          otherwise, the character itself.
  1.2059 +     * @see     Character#isLowerCase(char)
  1.2060 +     * @see     String#toLowerCase()
  1.2061 +     */
  1.2062 +    public static char toLowerCase(char ch) {
  1.2063 +        return String.valueOf(ch).toLowerCase().charAt(0);
  1.2064 +    }
  1.2065 +
  1.2066 +    /**
  1.2067 +     * Converts the character argument to uppercase using case mapping
  1.2068 +     * information from the UnicodeData file.
  1.2069 +     * <p>
  1.2070 +     * Note that
  1.2071 +     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
  1.2072 +     * does not always return {@code true} for some ranges of
  1.2073 +     * characters, particularly those that are symbols or ideographs.
  1.2074 +     *
  1.2075 +     * <p>In general, {@link String#toUpperCase()} should be used to map
  1.2076 +     * characters to uppercase. {@code String} case mapping methods
  1.2077 +     * have several benefits over {@code Character} case mapping methods.
  1.2078 +     * {@code String} case mapping methods can perform locale-sensitive
  1.2079 +     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  1.2080 +     * the {@code Character} case mapping methods cannot.
  1.2081 +     *
  1.2082 +     * <p><b>Note:</b> This method cannot handle <a
  1.2083 +     * href="#supplementary"> supplementary characters</a>. To support
  1.2084 +     * all Unicode characters, including supplementary characters, use
  1.2085 +     * the {@link #toUpperCase(int)} method.
  1.2086 +     *
  1.2087 +     * @param   ch   the character to be converted.
  1.2088 +     * @return  the uppercase equivalent of the character, if any;
  1.2089 +     *          otherwise, the character itself.
  1.2090 +     * @see     Character#isUpperCase(char)
  1.2091 +     * @see     String#toUpperCase()
  1.2092 +     */
  1.2093 +    public static char toUpperCase(char ch) {
  1.2094 +        return String.valueOf(ch).toUpperCase().charAt(0);
  1.2095 +    }
  1.2096 +
  1.2097 +    /**
  1.2098 +     * Returns the numeric value of the character {@code ch} in the
  1.2099 +     * specified radix.
  1.2100 +     * <p>
  1.2101 +     * If the radix is not in the range {@code MIN_RADIX} &le;
  1.2102 +     * {@code radix} &le; {@code MAX_RADIX} or if the
  1.2103 +     * value of {@code ch} is not a valid digit in the specified
  1.2104 +     * radix, {@code -1} is returned. A character is a valid digit
  1.2105 +     * if at least one of the following is true:
  1.2106 +     * <ul>
  1.2107 +     * <li>The method {@code isDigit} is {@code true} of the character
  1.2108 +     *     and the Unicode decimal digit value of the character (or its
  1.2109 +     *     single-character decomposition) is less than the specified radix.
  1.2110 +     *     In this case the decimal digit value is returned.
  1.2111 +     * <li>The character is one of the uppercase Latin letters
  1.2112 +     *     {@code 'A'} through {@code 'Z'} and its code is less than
  1.2113 +     *     {@code radix + 'A' - 10}.
  1.2114 +     *     In this case, {@code ch - 'A' + 10}
  1.2115 +     *     is returned.
  1.2116 +     * <li>The character is one of the lowercase Latin letters
  1.2117 +     *     {@code 'a'} through {@code 'z'} and its code is less than
  1.2118 +     *     {@code radix + 'a' - 10}.
  1.2119 +     *     In this case, {@code ch - 'a' + 10}
  1.2120 +     *     is returned.
  1.2121 +     * <li>The character is one of the fullwidth uppercase Latin letters A
  1.2122 +     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  1.2123 +     *     and its code is less than
  1.2124 +     *     {@code radix + '\u005CuFF21' - 10}.
  1.2125 +     *     In this case, {@code ch - '\u005CuFF21' + 10}
  1.2126 +     *     is returned.
  1.2127 +     * <li>The character is one of the fullwidth lowercase Latin letters a
  1.2128 +     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  1.2129 +     *     and its code is less than
  1.2130 +     *     {@code radix + '\u005CuFF41' - 10}.
  1.2131 +     *     In this case, {@code ch - '\u005CuFF41' + 10}
  1.2132 +     *     is returned.
  1.2133 +     * </ul>
  1.2134 +     *
  1.2135 +     * <p><b>Note:</b> This method cannot handle <a
  1.2136 +     * href="#supplementary"> supplementary characters</a>. To support
  1.2137 +     * all Unicode characters, including supplementary characters, use
  1.2138 +     * the {@link #digit(int, int)} method.
  1.2139 +     *
  1.2140 +     * @param   ch      the character to be converted.
  1.2141 +     * @param   radix   the radix.
  1.2142 +     * @return  the numeric value represented by the character in the
  1.2143 +     *          specified radix.
  1.2144 +     * @see     Character#forDigit(int, int)
  1.2145 +     * @see     Character#isDigit(char)
  1.2146 +     */
  1.2147 +    public static int digit(char ch, int radix) {
  1.2148 +        return digit((int)ch, radix);
  1.2149 +    }
  1.2150 +
  1.2151 +    /**
  1.2152 +     * Returns the numeric value of the specified character (Unicode
  1.2153 +     * code point) in the specified radix.
  1.2154 +     *
  1.2155 +     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
  1.2156 +     * {@code radix} &le; {@code MAX_RADIX} or if the
  1.2157 +     * character is not a valid digit in the specified
  1.2158 +     * radix, {@code -1} is returned. A character is a valid digit
  1.2159 +     * if at least one of the following is true:
  1.2160 +     * <ul>
  1.2161 +     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
  1.2162 +     *     and the Unicode decimal digit value of the character (or its
  1.2163 +     *     single-character decomposition) is less than the specified radix.
  1.2164 +     *     In this case the decimal digit value is returned.
  1.2165 +     * <li>The character is one of the uppercase Latin letters
  1.2166 +     *     {@code 'A'} through {@code 'Z'} and its code is less than
  1.2167 +     *     {@code radix + 'A' - 10}.
  1.2168 +     *     In this case, {@code codePoint - 'A' + 10}
  1.2169 +     *     is returned.
  1.2170 +     * <li>The character is one of the lowercase Latin letters
  1.2171 +     *     {@code 'a'} through {@code 'z'} and its code is less than
  1.2172 +     *     {@code radix + 'a' - 10}.
  1.2173 +     *     In this case, {@code codePoint - 'a' + 10}
  1.2174 +     *     is returned.
  1.2175 +     * <li>The character is one of the fullwidth uppercase Latin letters A
  1.2176 +     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  1.2177 +     *     and its code is less than
  1.2178 +     *     {@code radix + '\u005CuFF21' - 10}.
  1.2179 +     *     In this case,
  1.2180 +     *     {@code codePoint - '\u005CuFF21' + 10}
  1.2181 +     *     is returned.
  1.2182 +     * <li>The character is one of the fullwidth lowercase Latin letters a
  1.2183 +     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  1.2184 +     *     and its code is less than
  1.2185 +     *     {@code radix + '\u005CuFF41'- 10}.
  1.2186 +     *     In this case,
  1.2187 +     *     {@code codePoint - '\u005CuFF41' + 10}
  1.2188 +     *     is returned.
  1.2189 +     * </ul>
  1.2190 +     *
  1.2191 +     * @param   codePoint the character (Unicode code point) to be converted.
  1.2192 +     * @param   radix   the radix.
  1.2193 +     * @return  the numeric value represented by the character in the
  1.2194 +     *          specified radix.
  1.2195 +     * @see     Character#forDigit(int, int)
  1.2196 +     * @see     Character#isDigit(int)
  1.2197 +     * @since   1.5
  1.2198 +     */
  1.2199 +    @JavaScriptBody(args = { "codePoint", "radix" }, body=
  1.2200 +        "var x = parseInt(String.fromCharCode(codePoint), radix);\n"
  1.2201 +      + "return isNaN(x) ? -1 : x;"
  1.2202 +    )
  1.2203 +    public static int digit(int codePoint, int radix) {
  1.2204 +        throw new UnsupportedOperationException();
  1.2205 +    }
  1.2206 +
  1.2207 +    /**
  1.2208 +     * Returns the {@code int} value that the specified Unicode
  1.2209 +     * character represents. For example, the character
  1.2210 +     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
  1.2211 +     * an int with a value of 50.
  1.2212 +     * <p>
  1.2213 +     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  1.2214 +     * {@code '\u005Cu005A'}), lowercase
  1.2215 +     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  1.2216 +     * full width variant ({@code '\u005CuFF21'} through
  1.2217 +     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  1.2218 +     * {@code '\u005CuFF5A'}) forms have numeric values from 10
  1.2219 +     * through 35. This is independent of the Unicode specification,
  1.2220 +     * which does not assign numeric values to these {@code char}
  1.2221 +     * values.
  1.2222 +     * <p>
  1.2223 +     * If the character does not have a numeric value, then -1 is returned.
  1.2224 +     * If the character has a numeric value that cannot be represented as a
  1.2225 +     * nonnegative integer (for example, a fractional value), then -2
  1.2226 +     * is returned.
  1.2227 +     *
  1.2228 +     * <p><b>Note:</b> This method cannot handle <a
  1.2229 +     * href="#supplementary"> supplementary characters</a>. To support
  1.2230 +     * all Unicode characters, including supplementary characters, use
  1.2231 +     * the {@link #getNumericValue(int)} method.
  1.2232 +     *
  1.2233 +     * @param   ch      the character to be converted.
  1.2234 +     * @return  the numeric value of the character, as a nonnegative {@code int}
  1.2235 +     *           value; -2 if the character has a numeric value that is not a
  1.2236 +     *          nonnegative integer; -1 if the character has no numeric value.
  1.2237 +     * @see     Character#forDigit(int, int)
  1.2238 +     * @see     Character#isDigit(char)
  1.2239 +     * @since   1.1
  1.2240 +     */
  1.2241 +    public static int getNumericValue(char ch) {
  1.2242 +        return getNumericValue((int)ch);
  1.2243 +    }
  1.2244 +
  1.2245 +    /**
  1.2246 +     * Returns the {@code int} value that the specified
  1.2247 +     * character (Unicode code point) represents. For example, the character
  1.2248 +     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
  1.2249 +     * an {@code int} with a value of 50.
  1.2250 +     * <p>
  1.2251 +     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  1.2252 +     * {@code '\u005Cu005A'}), lowercase
  1.2253 +     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  1.2254 +     * full width variant ({@code '\u005CuFF21'} through
  1.2255 +     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  1.2256 +     * {@code '\u005CuFF5A'}) forms have numeric values from 10
  1.2257 +     * through 35. This is independent of the Unicode specification,
  1.2258 +     * which does not assign numeric values to these {@code char}
  1.2259 +     * values.
  1.2260 +     * <p>
  1.2261 +     * If the character does not have a numeric value, then -1 is returned.
  1.2262 +     * If the character has a numeric value that cannot be represented as a
  1.2263 +     * nonnegative integer (for example, a fractional value), then -2
  1.2264 +     * is returned.
  1.2265 +     *
  1.2266 +     * @param   codePoint the character (Unicode code point) to be converted.
  1.2267 +     * @return  the numeric value of the character, as a nonnegative {@code int}
  1.2268 +     *          value; -2 if the character has a numeric value that is not a
  1.2269 +     *          nonnegative integer; -1 if the character has no numeric value.
  1.2270 +     * @see     Character#forDigit(int, int)
  1.2271 +     * @see     Character#isDigit(int)
  1.2272 +     * @since   1.5
  1.2273 +     */
  1.2274 +    public static int getNumericValue(int codePoint) {
  1.2275 +        throw new UnsupportedOperationException();
  1.2276 +    }
  1.2277 +
  1.2278 +    /**
  1.2279 +     * Determines if the specified character is ISO-LATIN-1 white space.
  1.2280 +     * This method returns {@code true} for the following five
  1.2281 +     * characters only:
  1.2282 +     * <table>
  1.2283 +     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
  1.2284 +     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
  1.2285 +     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
  1.2286 +     *     <td>{@code NEW LINE}</td></tr>
  1.2287 +     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
  1.2288 +     *     <td>{@code FORM FEED}</td></tr>
  1.2289 +     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
  1.2290 +     *     <td>{@code CARRIAGE RETURN}</td></tr>
  1.2291 +     * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
  1.2292 +     *     <td>{@code SPACE}</td></tr>
  1.2293 +     * </table>
  1.2294 +     *
  1.2295 +     * @param      ch   the character to be tested.
  1.2296 +     * @return     {@code true} if the character is ISO-LATIN-1 white
  1.2297 +     *             space; {@code false} otherwise.
  1.2298 +     * @see        Character#isSpaceChar(char)
  1.2299 +     * @see        Character#isWhitespace(char)
  1.2300 +     * @deprecated Replaced by isWhitespace(char).
  1.2301 +     */
  1.2302 +    @Deprecated
  1.2303 +    public static boolean isSpace(char ch) {
  1.2304 +        return (ch <= 0x0020) &&
  1.2305 +            (((((1L << 0x0009) |
  1.2306 +            (1L << 0x000A) |
  1.2307 +            (1L << 0x000C) |
  1.2308 +            (1L << 0x000D) |
  1.2309 +            (1L << 0x0020)) >> ch) & 1L) != 0);
  1.2310 +    }
  1.2311 +
  1.2312 +
  1.2313 +
  1.2314 +    /**
  1.2315 +     * Determines if the specified character is white space according to Java.
  1.2316 +     * A character is a Java whitespace character if and only if it satisfies
  1.2317 +     * one of the following criteria:
  1.2318 +     * <ul>
  1.2319 +     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
  1.2320 +     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
  1.2321 +     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  1.2322 +     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  1.2323 +     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  1.2324 +     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  1.2325 +     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  1.2326 +     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  1.2327 +     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  1.2328 +     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  1.2329 +     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  1.2330 +     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  1.2331 +     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  1.2332 +     * </ul>
  1.2333 +     *
  1.2334 +     * <p><b>Note:</b> This method cannot handle <a
  1.2335 +     * href="#supplementary"> supplementary characters</a>. To support
  1.2336 +     * all Unicode characters, including supplementary characters, use
  1.2337 +     * the {@link #isWhitespace(int)} method.
  1.2338 +     *
  1.2339 +     * @param   ch the character to be tested.
  1.2340 +     * @return  {@code true} if the character is a Java whitespace
  1.2341 +     *          character; {@code false} otherwise.
  1.2342 +     * @see     Character#isSpaceChar(char)
  1.2343 +     * @since   1.1
  1.2344 +     */
  1.2345 +    public static boolean isWhitespace(char ch) {
  1.2346 +        return isWhitespace((int)ch);
  1.2347 +    }
  1.2348 +
  1.2349 +    /**
  1.2350 +     * Determines if the specified character (Unicode code point) is
  1.2351 +     * white space according to Java.  A character is a Java
  1.2352 +     * whitespace character if and only if it satisfies one of the
  1.2353 +     * following criteria:
  1.2354 +     * <ul>
  1.2355 +     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
  1.2356 +     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
  1.2357 +     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  1.2358 +     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  1.2359 +     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  1.2360 +     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  1.2361 +     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  1.2362 +     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  1.2363 +     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  1.2364 +     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  1.2365 +     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  1.2366 +     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  1.2367 +     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  1.2368 +     * </ul>
  1.2369 +     * <p>
  1.2370 +     *
  1.2371 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.2372 +     * @return  {@code true} if the character is a Java whitespace
  1.2373 +     *          character; {@code false} otherwise.
  1.2374 +     * @see     Character#isSpaceChar(int)
  1.2375 +     * @since   1.5
  1.2376 +     */
  1.2377 +    public static boolean isWhitespace(int codePoint) {
  1.2378 +        throw new UnsupportedOperationException();
  1.2379 +    }
  1.2380 +
  1.2381 +    /**
  1.2382 +     * Determines if the specified character is an ISO control
  1.2383 +     * character.  A character is considered to be an ISO control
  1.2384 +     * character if its code is in the range {@code '\u005Cu0000'}
  1.2385 +     * through {@code '\u005Cu001F'} or in the range
  1.2386 +     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  1.2387 +     *
  1.2388 +     * <p><b>Note:</b> This method cannot handle <a
  1.2389 +     * href="#supplementary"> supplementary characters</a>. To support
  1.2390 +     * all Unicode characters, including supplementary characters, use
  1.2391 +     * the {@link #isISOControl(int)} method.
  1.2392 +     *
  1.2393 +     * @param   ch      the character to be tested.
  1.2394 +     * @return  {@code true} if the character is an ISO control character;
  1.2395 +     *          {@code false} otherwise.
  1.2396 +     *
  1.2397 +     * @see     Character#isSpaceChar(char)
  1.2398 +     * @see     Character#isWhitespace(char)
  1.2399 +     * @since   1.1
  1.2400 +     */
  1.2401 +    public static boolean isISOControl(char ch) {
  1.2402 +        return isISOControl((int)ch);
  1.2403 +    }
  1.2404 +
  1.2405 +    /**
  1.2406 +     * Determines if the referenced character (Unicode code point) is an ISO control
  1.2407 +     * character.  A character is considered to be an ISO control
  1.2408 +     * character if its code is in the range {@code '\u005Cu0000'}
  1.2409 +     * through {@code '\u005Cu001F'} or in the range
  1.2410 +     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  1.2411 +     *
  1.2412 +     * @param   codePoint the character (Unicode code point) to be tested.
  1.2413 +     * @return  {@code true} if the character is an ISO control character;
  1.2414 +     *          {@code false} otherwise.
  1.2415 +     * @see     Character#isSpaceChar(int)
  1.2416 +     * @see     Character#isWhitespace(int)
  1.2417 +     * @since   1.5
  1.2418 +     */
  1.2419 +    public static boolean isISOControl(int codePoint) {
  1.2420 +        // Optimized form of:
  1.2421 +        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
  1.2422 +        //     (codePoint >= 0x7F && codePoint <= 0x9F);
  1.2423 +        return codePoint <= 0x9F &&
  1.2424 +            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
  1.2425 +    }
  1.2426 +
  1.2427 +    /**
  1.2428 +     * Determines the character representation for a specific digit in
  1.2429 +     * the specified radix. If the value of {@code radix} is not a
  1.2430 +     * valid radix, or the value of {@code digit} is not a valid
  1.2431 +     * digit in the specified radix, the null character
  1.2432 +     * ({@code '\u005Cu0000'}) is returned.
  1.2433 +     * <p>
  1.2434 +     * The {@code radix} argument is valid if it is greater than or
  1.2435 +     * equal to {@code MIN_RADIX} and less than or equal to
  1.2436 +     * {@code MAX_RADIX}. The {@code digit} argument is valid if
  1.2437 +     * {@code 0 <= digit < radix}.
  1.2438 +     * <p>
  1.2439 +     * If the digit is less than 10, then
  1.2440 +     * {@code '0' + digit} is returned. Otherwise, the value
  1.2441 +     * {@code 'a' + digit - 10} is returned.
  1.2442 +     *
  1.2443 +     * @param   digit   the number to convert to a character.
  1.2444 +     * @param   radix   the radix.
  1.2445 +     * @return  the {@code char} representation of the specified digit
  1.2446 +     *          in the specified radix.
  1.2447 +     * @see     Character#MIN_RADIX
  1.2448 +     * @see     Character#MAX_RADIX
  1.2449 +     * @see     Character#digit(char, int)
  1.2450 +     */
  1.2451 +    public static char forDigit(int digit, int radix) {
  1.2452 +        if ((digit >= radix) || (digit < 0)) {
  1.2453 +            return '\0';
  1.2454 +        }
  1.2455 +        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
  1.2456 +            return '\0';
  1.2457 +        }
  1.2458 +        if (digit < 10) {
  1.2459 +            return (char)('0' + digit);
  1.2460 +        }
  1.2461 +        return (char)('a' - 10 + digit);
  1.2462 +    }
  1.2463 +
  1.2464 +    /**
  1.2465 +     * Compares two {@code Character} objects numerically.
  1.2466 +     *
  1.2467 +     * @param   anotherCharacter   the {@code Character} to be compared.
  1.2468 +
  1.2469 +     * @return  the value {@code 0} if the argument {@code Character}
  1.2470 +     *          is equal to this {@code Character}; a value less than
  1.2471 +     *          {@code 0} if this {@code Character} is numerically less
  1.2472 +     *          than the {@code Character} argument; and a value greater than
  1.2473 +     *          {@code 0} if this {@code Character} is numerically greater
  1.2474 +     *          than the {@code Character} argument (unsigned comparison).
  1.2475 +     *          Note that this is strictly a numerical comparison; it is not
  1.2476 +     *          locale-dependent.
  1.2477 +     * @since   1.2
  1.2478 +     */
  1.2479 +    public int compareTo(Character anotherCharacter) {
  1.2480 +        return compare(this.value, anotherCharacter.value);
  1.2481 +    }
  1.2482 +
  1.2483 +    /**
  1.2484 +     * Compares two {@code char} values numerically.
  1.2485 +     * The value returned is identical to what would be returned by:
  1.2486 +     * <pre>
  1.2487 +     *    Character.valueOf(x).compareTo(Character.valueOf(y))
  1.2488 +     * </pre>
  1.2489 +     *
  1.2490 +     * @param  x the first {@code char} to compare
  1.2491 +     * @param  y the second {@code char} to compare
  1.2492 +     * @return the value {@code 0} if {@code x == y};
  1.2493 +     *         a value less than {@code 0} if {@code x < y}; and
  1.2494 +     *         a value greater than {@code 0} if {@code x > y}
  1.2495 +     * @since 1.7
  1.2496 +     */
  1.2497 +    public static int compare(char x, char y) {
  1.2498 +        return x - y;
  1.2499 +    }
  1.2500 +
  1.2501 +
  1.2502 +    /**
  1.2503 +     * The number of bits used to represent a <tt>char</tt> value in unsigned
  1.2504 +     * binary form, constant {@code 16}.
  1.2505 +     *
  1.2506 +     * @since 1.5
  1.2507 +     */
  1.2508 +    public static final int SIZE = 16;
  1.2509 +
  1.2510 +    /**
  1.2511 +     * Returns the value obtained by reversing the order of the bytes in the
  1.2512 +     * specified <tt>char</tt> value.
  1.2513 +     *
  1.2514 +     * @return the value obtained by reversing (or, equivalently, swapping)
  1.2515 +     *     the bytes in the specified <tt>char</tt> value.
  1.2516 +     * @since 1.5
  1.2517 +     */
  1.2518 +    public static char reverseBytes(char ch) {
  1.2519 +        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
  1.2520 +    }
  1.2521 +
  1.2522 +}
changeset 772	d382dacfd73f
parent 594	035fcbd7a33c
child 791	af4001c85438