1.1 --- a/emul/mini/src/main/java/java/lang/Character.java Tue Feb 26 14:55:55 2013 +0100
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,2519 +0,0 @@
1.4 -/*
1.5 - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
1.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1.7 - *
1.8 - * This code is free software; you can redistribute it and/or modify it
1.9 - * under the terms of the GNU General Public License version 2 only, as
1.10 - * published by the Free Software Foundation. Oracle designates this
1.11 - * particular file as subject to the "Classpath" exception as provided
1.12 - * by Oracle in the LICENSE file that accompanied this code.
1.13 - *
1.14 - * This code is distributed in the hope that it will be useful, but WITHOUT
1.15 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.16 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1.17 - * version 2 for more details (a copy is included in the LICENSE file that
1.18 - * accompanied this code).
1.19 - *
1.20 - * You should have received a copy of the GNU General Public License version
1.21 - * 2 along with this work; if not, write to the Free Software Foundation,
1.22 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1.23 - *
1.24 - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1.25 - * or visit www.oracle.com if you need additional information or have any
1.26 - * questions.
1.27 - */
1.28 -
1.29 -package java.lang;
1.30 -
1.31 -import org.apidesign.bck2brwsr.core.JavaScriptBody;
1.32 -
1.33 -/**
1.34 - * The {@code Character} class wraps a value of the primitive
1.35 - * type {@code char} in an object. An object of type
1.36 - * {@code Character} contains a single field whose type is
1.37 - * {@code char}.
1.38 - * <p>
1.39 - * In addition, this class provides several methods for determining
1.40 - * a character's category (lowercase letter, digit, etc.) and for converting
1.41 - * characters from uppercase to lowercase and vice versa.
1.42 - * <p>
1.43 - * Character information is based on the Unicode Standard, version 6.0.0.
1.44 - * <p>
1.45 - * The methods and data of class {@code Character} are defined by
1.46 - * the information in the <i>UnicodeData</i> file that is part of the
1.47 - * Unicode Character Database maintained by the Unicode
1.48 - * Consortium. This file specifies various properties including name
1.49 - * and general category for every defined Unicode code point or
1.50 - * character range.
1.51 - * <p>
1.52 - * The file and its description are available from the Unicode Consortium at:
1.53 - * <ul>
1.54 - * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
1.55 - * </ul>
1.56 - *
1.57 - * <h4><a name="unicode">Unicode Character Representations</a></h4>
1.58 - *
1.59 - * <p>The {@code char} data type (and therefore the value that a
1.60 - * {@code Character} object encapsulates) are based on the
1.61 - * original Unicode specification, which defined characters as
1.62 - * fixed-width 16-bit entities. The Unicode Standard has since been
1.63 - * changed to allow for characters whose representation requires more
1.64 - * than 16 bits. The range of legal <em>code point</em>s is now
1.65 - * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
1.66 - * (Refer to the <a
1.67 - * href="http://www.unicode.org/reports/tr27/#notation"><i>
1.68 - * definition</i></a> of the U+<i>n</i> notation in the Unicode
1.69 - * Standard.)
1.70 - *
1.71 - * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
1.72 - * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
1.73 - * <a name="supplementary">Characters</a> whose code points are greater
1.74 - * than U+FFFF are called <em>supplementary character</em>s. The Java
1.75 - * platform uses the UTF-16 representation in {@code char} arrays and
1.76 - * in the {@code String} and {@code StringBuffer} classes. In
1.77 - * this representation, supplementary characters are represented as a pair
1.78 - * of {@code char} values, the first from the <em>high-surrogates</em>
1.79 - * range, (\uD800-\uDBFF), the second from the
1.80 - * <em>low-surrogates</em> range (\uDC00-\uDFFF).
1.81 - *
1.82 - * <p>A {@code char} value, therefore, represents Basic
1.83 - * Multilingual Plane (BMP) code points, including the surrogate
1.84 - * code points, or code units of the UTF-16 encoding. An
1.85 - * {@code int} value represents all Unicode code points,
1.86 - * including supplementary code points. The lower (least significant)
1.87 - * 21 bits of {@code int} are used to represent Unicode code
1.88 - * points and the upper (most significant) 11 bits must be zero.
1.89 - * Unless otherwise specified, the behavior with respect to
1.90 - * supplementary characters and surrogate {@code char} values is
1.91 - * as follows:
1.92 - *
1.93 - * <ul>
1.94 - * <li>The methods that only accept a {@code char} value cannot support
1.95 - * supplementary characters. They treat {@code char} values from the
1.96 - * surrogate ranges as undefined characters. For example,
1.97 - * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
1.98 - * this specific value if followed by any low-surrogate value in a string
1.99 - * would represent a letter.
1.100 - *
1.101 - * <li>The methods that accept an {@code int} value support all
1.102 - * Unicode characters, including supplementary characters. For
1.103 - * example, {@code Character.isLetter(0x2F81A)} returns
1.104 - * {@code true} because the code point value represents a letter
1.105 - * (a CJK ideograph).
1.106 - * </ul>
1.107 - *
1.108 - * <p>In the Java SE API documentation, <em>Unicode code point</em> is
1.109 - * used for character values in the range between U+0000 and U+10FFFF,
1.110 - * and <em>Unicode code unit</em> is used for 16-bit
1.111 - * {@code char} values that are code units of the <em>UTF-16</em>
1.112 - * encoding. For more information on Unicode terminology, refer to the
1.113 - * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
1.114 - *
1.115 - * @author Lee Boynton
1.116 - * @author Guy Steele
1.117 - * @author Akira Tanaka
1.118 - * @author Martin Buchholz
1.119 - * @author Ulf Zibis
1.120 - * @since 1.0
1.121 - */
1.122 -public final
1.123 -class Character implements java.io.Serializable, Comparable<Character> {
1.124 - /**
1.125 - * The minimum radix available for conversion to and from strings.
1.126 - * The constant value of this field is the smallest value permitted
1.127 - * for the radix argument in radix-conversion methods such as the
1.128 - * {@code digit} method, the {@code forDigit} method, and the
1.129 - * {@code toString} method of class {@code Integer}.
1.130 - *
1.131 - * @see Character#digit(char, int)
1.132 - * @see Character#forDigit(int, int)
1.133 - * @see Integer#toString(int, int)
1.134 - * @see Integer#valueOf(String)
1.135 - */
1.136 - public static final int MIN_RADIX = 2;
1.137 -
1.138 - /**
1.139 - * The maximum radix available for conversion to and from strings.
1.140 - * The constant value of this field is the largest value permitted
1.141 - * for the radix argument in radix-conversion methods such as the
1.142 - * {@code digit} method, the {@code forDigit} method, and the
1.143 - * {@code toString} method of class {@code Integer}.
1.144 - *
1.145 - * @see Character#digit(char, int)
1.146 - * @see Character#forDigit(int, int)
1.147 - * @see Integer#toString(int, int)
1.148 - * @see Integer#valueOf(String)
1.149 - */
1.150 - public static final int MAX_RADIX = 36;
1.151 -
1.152 - /**
1.153 - * The constant value of this field is the smallest value of type
1.154 - * {@code char}, {@code '\u005Cu0000'}.
1.155 - *
1.156 - * @since 1.0.2
1.157 - */
1.158 - public static final char MIN_VALUE = '\u0000';
1.159 -
1.160 - /**
1.161 - * The constant value of this field is the largest value of type
1.162 - * {@code char}, {@code '\u005CuFFFF'}.
1.163 - *
1.164 - * @since 1.0.2
1.165 - */
1.166 - public static final char MAX_VALUE = '\uFFFF';
1.167 -
1.168 - /**
1.169 - * The {@code Class} instance representing the primitive type
1.170 - * {@code char}.
1.171 - *
1.172 - * @since 1.1
1.173 - */
1.174 - public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
1.175 -
1.176 - /*
1.177 - * Normative general types
1.178 - */
1.179 -
1.180 - /*
1.181 - * General character types
1.182 - */
1.183 -
1.184 - /**
1.185 - * General category "Cn" in the Unicode specification.
1.186 - * @since 1.1
1.187 - */
1.188 - public static final byte UNASSIGNED = 0;
1.189 -
1.190 - /**
1.191 - * General category "Lu" in the Unicode specification.
1.192 - * @since 1.1
1.193 - */
1.194 - public static final byte UPPERCASE_LETTER = 1;
1.195 -
1.196 - /**
1.197 - * General category "Ll" in the Unicode specification.
1.198 - * @since 1.1
1.199 - */
1.200 - public static final byte LOWERCASE_LETTER = 2;
1.201 -
1.202 - /**
1.203 - * General category "Lt" in the Unicode specification.
1.204 - * @since 1.1
1.205 - */
1.206 - public static final byte TITLECASE_LETTER = 3;
1.207 -
1.208 - /**
1.209 - * General category "Lm" in the Unicode specification.
1.210 - * @since 1.1
1.211 - */
1.212 - public static final byte MODIFIER_LETTER = 4;
1.213 -
1.214 - /**
1.215 - * General category "Lo" in the Unicode specification.
1.216 - * @since 1.1
1.217 - */
1.218 - public static final byte OTHER_LETTER = 5;
1.219 -
1.220 - /**
1.221 - * General category "Mn" in the Unicode specification.
1.222 - * @since 1.1
1.223 - */
1.224 - public static final byte NON_SPACING_MARK = 6;
1.225 -
1.226 - /**
1.227 - * General category "Me" in the Unicode specification.
1.228 - * @since 1.1
1.229 - */
1.230 - public static final byte ENCLOSING_MARK = 7;
1.231 -
1.232 - /**
1.233 - * General category "Mc" in the Unicode specification.
1.234 - * @since 1.1
1.235 - */
1.236 - public static final byte COMBINING_SPACING_MARK = 8;
1.237 -
1.238 - /**
1.239 - * General category "Nd" in the Unicode specification.
1.240 - * @since 1.1
1.241 - */
1.242 - public static final byte DECIMAL_DIGIT_NUMBER = 9;
1.243 -
1.244 - /**
1.245 - * General category "Nl" in the Unicode specification.
1.246 - * @since 1.1
1.247 - */
1.248 - public static final byte LETTER_NUMBER = 10;
1.249 -
1.250 - /**
1.251 - * General category "No" in the Unicode specification.
1.252 - * @since 1.1
1.253 - */
1.254 - public static final byte OTHER_NUMBER = 11;
1.255 -
1.256 - /**
1.257 - * General category "Zs" in the Unicode specification.
1.258 - * @since 1.1
1.259 - */
1.260 - public static final byte SPACE_SEPARATOR = 12;
1.261 -
1.262 - /**
1.263 - * General category "Zl" in the Unicode specification.
1.264 - * @since 1.1
1.265 - */
1.266 - public static final byte LINE_SEPARATOR = 13;
1.267 -
1.268 - /**
1.269 - * General category "Zp" in the Unicode specification.
1.270 - * @since 1.1
1.271 - */
1.272 - public static final byte PARAGRAPH_SEPARATOR = 14;
1.273 -
1.274 - /**
1.275 - * General category "Cc" in the Unicode specification.
1.276 - * @since 1.1
1.277 - */
1.278 - public static final byte CONTROL = 15;
1.279 -
1.280 - /**
1.281 - * General category "Cf" in the Unicode specification.
1.282 - * @since 1.1
1.283 - */
1.284 - public static final byte FORMAT = 16;
1.285 -
1.286 - /**
1.287 - * General category "Co" in the Unicode specification.
1.288 - * @since 1.1
1.289 - */
1.290 - public static final byte PRIVATE_USE = 18;
1.291 -
1.292 - /**
1.293 - * General category "Cs" in the Unicode specification.
1.294 - * @since 1.1
1.295 - */
1.296 - public static final byte SURROGATE = 19;
1.297 -
1.298 - /**
1.299 - * General category "Pd" in the Unicode specification.
1.300 - * @since 1.1
1.301 - */
1.302 - public static final byte DASH_PUNCTUATION = 20;
1.303 -
1.304 - /**
1.305 - * General category "Ps" in the Unicode specification.
1.306 - * @since 1.1
1.307 - */
1.308 - public static final byte START_PUNCTUATION = 21;
1.309 -
1.310 - /**
1.311 - * General category "Pe" in the Unicode specification.
1.312 - * @since 1.1
1.313 - */
1.314 - public static final byte END_PUNCTUATION = 22;
1.315 -
1.316 - /**
1.317 - * General category "Pc" in the Unicode specification.
1.318 - * @since 1.1
1.319 - */
1.320 - public static final byte CONNECTOR_PUNCTUATION = 23;
1.321 -
1.322 - /**
1.323 - * General category "Po" in the Unicode specification.
1.324 - * @since 1.1
1.325 - */
1.326 - public static final byte OTHER_PUNCTUATION = 24;
1.327 -
1.328 - /**
1.329 - * General category "Sm" in the Unicode specification.
1.330 - * @since 1.1
1.331 - */
1.332 - public static final byte MATH_SYMBOL = 25;
1.333 -
1.334 - /**
1.335 - * General category "Sc" in the Unicode specification.
1.336 - * @since 1.1
1.337 - */
1.338 - public static final byte CURRENCY_SYMBOL = 26;
1.339 -
1.340 - /**
1.341 - * General category "Sk" in the Unicode specification.
1.342 - * @since 1.1
1.343 - */
1.344 - public static final byte MODIFIER_SYMBOL = 27;
1.345 -
1.346 - /**
1.347 - * General category "So" in the Unicode specification.
1.348 - * @since 1.1
1.349 - */
1.350 - public static final byte OTHER_SYMBOL = 28;
1.351 -
1.352 - /**
1.353 - * General category "Pi" in the Unicode specification.
1.354 - * @since 1.4
1.355 - */
1.356 - public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1.357 -
1.358 - /**
1.359 - * General category "Pf" in the Unicode specification.
1.360 - * @since 1.4
1.361 - */
1.362 - public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1.363 -
1.364 - /**
1.365 - * Error flag. Use int (code point) to avoid confusion with U+FFFF.
1.366 - */
1.367 - static final int ERROR = 0xFFFFFFFF;
1.368 -
1.369 -
1.370 - /**
1.371 - * Undefined bidirectional character type. Undefined {@code char}
1.372 - * values have undefined directionality in the Unicode specification.
1.373 - * @since 1.4
1.374 - */
1.375 - public static final byte DIRECTIONALITY_UNDEFINED = -1;
1.376 -
1.377 - /**
1.378 - * Strong bidirectional character type "L" in the Unicode specification.
1.379 - * @since 1.4
1.380 - */
1.381 - public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1.382 -
1.383 - /**
1.384 - * Strong bidirectional character type "R" in the Unicode specification.
1.385 - * @since 1.4
1.386 - */
1.387 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1.388 -
1.389 - /**
1.390 - * Strong bidirectional character type "AL" in the Unicode specification.
1.391 - * @since 1.4
1.392 - */
1.393 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1.394 -
1.395 - /**
1.396 - * Weak bidirectional character type "EN" in the Unicode specification.
1.397 - * @since 1.4
1.398 - */
1.399 - public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1.400 -
1.401 - /**
1.402 - * Weak bidirectional character type "ES" in the Unicode specification.
1.403 - * @since 1.4
1.404 - */
1.405 - public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1.406 -
1.407 - /**
1.408 - * Weak bidirectional character type "ET" in the Unicode specification.
1.409 - * @since 1.4
1.410 - */
1.411 - public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1.412 -
1.413 - /**
1.414 - * Weak bidirectional character type "AN" in the Unicode specification.
1.415 - * @since 1.4
1.416 - */
1.417 - public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1.418 -
1.419 - /**
1.420 - * Weak bidirectional character type "CS" in the Unicode specification.
1.421 - * @since 1.4
1.422 - */
1.423 - public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1.424 -
1.425 - /**
1.426 - * Weak bidirectional character type "NSM" in the Unicode specification.
1.427 - * @since 1.4
1.428 - */
1.429 - public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1.430 -
1.431 - /**
1.432 - * Weak bidirectional character type "BN" in the Unicode specification.
1.433 - * @since 1.4
1.434 - */
1.435 - public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1.436 -
1.437 - /**
1.438 - * Neutral bidirectional character type "B" in the Unicode specification.
1.439 - * @since 1.4
1.440 - */
1.441 - public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1.442 -
1.443 - /**
1.444 - * Neutral bidirectional character type "S" in the Unicode specification.
1.445 - * @since 1.4
1.446 - */
1.447 - public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1.448 -
1.449 - /**
1.450 - * Neutral bidirectional character type "WS" in the Unicode specification.
1.451 - * @since 1.4
1.452 - */
1.453 - public static final byte DIRECTIONALITY_WHITESPACE = 12;
1.454 -
1.455 - /**
1.456 - * Neutral bidirectional character type "ON" in the Unicode specification.
1.457 - * @since 1.4
1.458 - */
1.459 - public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1.460 -
1.461 - /**
1.462 - * Strong bidirectional character type "LRE" in the Unicode specification.
1.463 - * @since 1.4
1.464 - */
1.465 - public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1.466 -
1.467 - /**
1.468 - * Strong bidirectional character type "LRO" in the Unicode specification.
1.469 - * @since 1.4
1.470 - */
1.471 - public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1.472 -
1.473 - /**
1.474 - * Strong bidirectional character type "RLE" in the Unicode specification.
1.475 - * @since 1.4
1.476 - */
1.477 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1.478 -
1.479 - /**
1.480 - * Strong bidirectional character type "RLO" in the Unicode specification.
1.481 - * @since 1.4
1.482 - */
1.483 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1.484 -
1.485 - /**
1.486 - * Weak bidirectional character type "PDF" in the Unicode specification.
1.487 - * @since 1.4
1.488 - */
1.489 - public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1.490 -
1.491 - /**
1.492 - * The minimum value of a
1.493 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.494 - * Unicode high-surrogate code unit</a>
1.495 - * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
1.496 - * A high-surrogate is also known as a <i>leading-surrogate</i>.
1.497 - *
1.498 - * @since 1.5
1.499 - */
1.500 - public static final char MIN_HIGH_SURROGATE = '\uD800';
1.501 -
1.502 - /**
1.503 - * The maximum value of a
1.504 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.505 - * Unicode high-surrogate code unit</a>
1.506 - * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
1.507 - * A high-surrogate is also known as a <i>leading-surrogate</i>.
1.508 - *
1.509 - * @since 1.5
1.510 - */
1.511 - public static final char MAX_HIGH_SURROGATE = '\uDBFF';
1.512 -
1.513 - /**
1.514 - * The minimum value of a
1.515 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.516 - * Unicode low-surrogate code unit</a>
1.517 - * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
1.518 - * A low-surrogate is also known as a <i>trailing-surrogate</i>.
1.519 - *
1.520 - * @since 1.5
1.521 - */
1.522 - public static final char MIN_LOW_SURROGATE = '\uDC00';
1.523 -
1.524 - /**
1.525 - * The maximum value of a
1.526 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.527 - * Unicode low-surrogate code unit</a>
1.528 - * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
1.529 - * A low-surrogate is also known as a <i>trailing-surrogate</i>.
1.530 - *
1.531 - * @since 1.5
1.532 - */
1.533 - public static final char MAX_LOW_SURROGATE = '\uDFFF';
1.534 -
1.535 - /**
1.536 - * The minimum value of a Unicode surrogate code unit in the
1.537 - * UTF-16 encoding, constant {@code '\u005CuD800'}.
1.538 - *
1.539 - * @since 1.5
1.540 - */
1.541 - public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
1.542 -
1.543 - /**
1.544 - * The maximum value of a Unicode surrogate code unit in the
1.545 - * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
1.546 - *
1.547 - * @since 1.5
1.548 - */
1.549 - public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
1.550 -
1.551 - /**
1.552 - * The minimum value of a
1.553 - * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
1.554 - * Unicode supplementary code point</a>, constant {@code U+10000}.
1.555 - *
1.556 - * @since 1.5
1.557 - */
1.558 - public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
1.559 -
1.560 - /**
1.561 - * The minimum value of a
1.562 - * <a href="http://www.unicode.org/glossary/#code_point">
1.563 - * Unicode code point</a>, constant {@code U+0000}.
1.564 - *
1.565 - * @since 1.5
1.566 - */
1.567 - public static final int MIN_CODE_POINT = 0x000000;
1.568 -
1.569 - /**
1.570 - * The maximum value of a
1.571 - * <a href="http://www.unicode.org/glossary/#code_point">
1.572 - * Unicode code point</a>, constant {@code U+10FFFF}.
1.573 - *
1.574 - * @since 1.5
1.575 - */
1.576 - public static final int MAX_CODE_POINT = 0X10FFFF;
1.577 -
1.578 -
1.579 - /**
1.580 - * Instances of this class represent particular subsets of the Unicode
1.581 - * character set. The only family of subsets defined in the
1.582 - * {@code Character} class is {@link Character.UnicodeBlock}.
1.583 - * Other portions of the Java API may define other subsets for their
1.584 - * own purposes.
1.585 - *
1.586 - * @since 1.2
1.587 - */
1.588 - public static class Subset {
1.589 -
1.590 - private String name;
1.591 -
1.592 - /**
1.593 - * Constructs a new {@code Subset} instance.
1.594 - *
1.595 - * @param name The name of this subset
1.596 - * @exception NullPointerException if name is {@code null}
1.597 - */
1.598 - protected Subset(String name) {
1.599 - if (name == null) {
1.600 - throw new NullPointerException("name");
1.601 - }
1.602 - this.name = name;
1.603 - }
1.604 -
1.605 - /**
1.606 - * Compares two {@code Subset} objects for equality.
1.607 - * This method returns {@code true} if and only if
1.608 - * {@code this} and the argument refer to the same
1.609 - * object; since this method is {@code final}, this
1.610 - * guarantee holds for all subclasses.
1.611 - */
1.612 - public final boolean equals(Object obj) {
1.613 - return (this == obj);
1.614 - }
1.615 -
1.616 - /**
1.617 - * Returns the standard hash code as defined by the
1.618 - * {@link Object#hashCode} method. This method
1.619 - * is {@code final} in order to ensure that the
1.620 - * {@code equals} and {@code hashCode} methods will
1.621 - * be consistent in all subclasses.
1.622 - */
1.623 - public final int hashCode() {
1.624 - return super.hashCode();
1.625 - }
1.626 -
1.627 - /**
1.628 - * Returns the name of this subset.
1.629 - */
1.630 - public final String toString() {
1.631 - return name;
1.632 - }
1.633 - }
1.634 -
1.635 - // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
1.636 - // for the latest specification of Unicode Blocks.
1.637 -
1.638 -
1.639 - /**
1.640 - * The value of the {@code Character}.
1.641 - *
1.642 - * @serial
1.643 - */
1.644 - private final char value;
1.645 -
1.646 - /** use serialVersionUID from JDK 1.0.2 for interoperability */
1.647 - private static final long serialVersionUID = 3786198910865385080L;
1.648 -
1.649 - /**
1.650 - * Constructs a newly allocated {@code Character} object that
1.651 - * represents the specified {@code char} value.
1.652 - *
1.653 - * @param value the value to be represented by the
1.654 - * {@code Character} object.
1.655 - */
1.656 - public Character(char value) {
1.657 - this.value = value;
1.658 - }
1.659 -
1.660 - private static class CharacterCache {
1.661 - private CharacterCache(){}
1.662 -
1.663 - static final Character cache[] = new Character[127 + 1];
1.664 -
1.665 - static {
1.666 - for (int i = 0; i < cache.length; i++)
1.667 - cache[i] = new Character((char)i);
1.668 - }
1.669 - }
1.670 -
1.671 - /**
1.672 - * Returns a <tt>Character</tt> instance representing the specified
1.673 - * <tt>char</tt> value.
1.674 - * If a new <tt>Character</tt> instance is not required, this method
1.675 - * should generally be used in preference to the constructor
1.676 - * {@link #Character(char)}, as this method is likely to yield
1.677 - * significantly better space and time performance by caching
1.678 - * frequently requested values.
1.679 - *
1.680 - * This method will always cache values in the range {@code
1.681 - * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
1.682 - * cache other values outside of this range.
1.683 - *
1.684 - * @param c a char value.
1.685 - * @return a <tt>Character</tt> instance representing <tt>c</tt>.
1.686 - * @since 1.5
1.687 - */
1.688 - public static Character valueOf(char c) {
1.689 - if (c <= 127) { // must cache
1.690 - return CharacterCache.cache[(int)c];
1.691 - }
1.692 - return new Character(c);
1.693 - }
1.694 -
1.695 - /**
1.696 - * Returns the value of this {@code Character} object.
1.697 - * @return the primitive {@code char} value represented by
1.698 - * this object.
1.699 - */
1.700 - public char charValue() {
1.701 - return value;
1.702 - }
1.703 -
1.704 - /**
1.705 - * Returns a hash code for this {@code Character}; equal to the result
1.706 - * of invoking {@code charValue()}.
1.707 - *
1.708 - * @return a hash code value for this {@code Character}
1.709 - */
1.710 - public int hashCode() {
1.711 - return (int)value;
1.712 - }
1.713 -
1.714 - /**
1.715 - * Compares this object against the specified object.
1.716 - * The result is {@code true} if and only if the argument is not
1.717 - * {@code null} and is a {@code Character} object that
1.718 - * represents the same {@code char} value as this object.
1.719 - *
1.720 - * @param obj the object to compare with.
1.721 - * @return {@code true} if the objects are the same;
1.722 - * {@code false} otherwise.
1.723 - */
1.724 - public boolean equals(Object obj) {
1.725 - if (obj instanceof Character) {
1.726 - return value == ((Character)obj).charValue();
1.727 - }
1.728 - return false;
1.729 - }
1.730 -
1.731 - /**
1.732 - * Returns a {@code String} object representing this
1.733 - * {@code Character}'s value. The result is a string of
1.734 - * length 1 whose sole component is the primitive
1.735 - * {@code char} value represented by this
1.736 - * {@code Character} object.
1.737 - *
1.738 - * @return a string representation of this object.
1.739 - */
1.740 - public String toString() {
1.741 - char buf[] = {value};
1.742 - return String.valueOf(buf);
1.743 - }
1.744 -
1.745 - /**
1.746 - * Returns a {@code String} object representing the
1.747 - * specified {@code char}. The result is a string of length
1.748 - * 1 consisting solely of the specified {@code char}.
1.749 - *
1.750 - * @param c the {@code char} to be converted
1.751 - * @return the string representation of the specified {@code char}
1.752 - * @since 1.4
1.753 - */
1.754 - public static String toString(char c) {
1.755 - return String.valueOf(c);
1.756 - }
1.757 -
1.758 - /**
1.759 - * Determines whether the specified code point is a valid
1.760 - * <a href="http://www.unicode.org/glossary/#code_point">
1.761 - * Unicode code point value</a>.
1.762 - *
1.763 - * @param codePoint the Unicode code point to be tested
1.764 - * @return {@code true} if the specified code point value is between
1.765 - * {@link #MIN_CODE_POINT} and
1.766 - * {@link #MAX_CODE_POINT} inclusive;
1.767 - * {@code false} otherwise.
1.768 - * @since 1.5
1.769 - */
1.770 - public static boolean isValidCodePoint(int codePoint) {
1.771 - // Optimized form of:
1.772 - // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
1.773 - int plane = codePoint >>> 16;
1.774 - return plane < ((MAX_CODE_POINT + 1) >>> 16);
1.775 - }
1.776 -
1.777 - /**
1.778 - * Determines whether the specified character (Unicode code point)
1.779 - * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
1.780 - * Such code points can be represented using a single {@code char}.
1.781 - *
1.782 - * @param codePoint the character (Unicode code point) to be tested
1.783 - * @return {@code true} if the specified code point is between
1.784 - * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
1.785 - * {@code false} otherwise.
1.786 - * @since 1.7
1.787 - */
1.788 - public static boolean isBmpCodePoint(int codePoint) {
1.789 - return codePoint >>> 16 == 0;
1.790 - // Optimized form of:
1.791 - // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
1.792 - // We consistently use logical shift (>>>) to facilitate
1.793 - // additional runtime optimizations.
1.794 - }
1.795 -
1.796 - /**
1.797 - * Determines whether the specified character (Unicode code point)
1.798 - * is in the <a href="#supplementary">supplementary character</a> range.
1.799 - *
1.800 - * @param codePoint the character (Unicode code point) to be tested
1.801 - * @return {@code true} if the specified code point is between
1.802 - * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
1.803 - * {@link #MAX_CODE_POINT} inclusive;
1.804 - * {@code false} otherwise.
1.805 - * @since 1.5
1.806 - */
1.807 - public static boolean isSupplementaryCodePoint(int codePoint) {
1.808 - return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
1.809 - && codePoint < MAX_CODE_POINT + 1;
1.810 - }
1.811 -
1.812 - /**
1.813 - * Determines if the given {@code char} value is a
1.814 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.815 - * Unicode high-surrogate code unit</a>
1.816 - * (also known as <i>leading-surrogate code unit</i>).
1.817 - *
1.818 - * <p>Such values do not represent characters by themselves,
1.819 - * but are used in the representation of
1.820 - * <a href="#supplementary">supplementary characters</a>
1.821 - * in the UTF-16 encoding.
1.822 - *
1.823 - * @param ch the {@code char} value to be tested.
1.824 - * @return {@code true} if the {@code char} value is between
1.825 - * {@link #MIN_HIGH_SURROGATE} and
1.826 - * {@link #MAX_HIGH_SURROGATE} inclusive;
1.827 - * {@code false} otherwise.
1.828 - * @see Character#isLowSurrogate(char)
1.829 - * @see Character.UnicodeBlock#of(int)
1.830 - * @since 1.5
1.831 - */
1.832 - public static boolean isHighSurrogate(char ch) {
1.833 - // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
1.834 - return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
1.835 - }
1.836 -
1.837 - /**
1.838 - * Determines if the given {@code char} value is a
1.839 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.840 - * Unicode low-surrogate code unit</a>
1.841 - * (also known as <i>trailing-surrogate code unit</i>).
1.842 - *
1.843 - * <p>Such values do not represent characters by themselves,
1.844 - * but are used in the representation of
1.845 - * <a href="#supplementary">supplementary characters</a>
1.846 - * in the UTF-16 encoding.
1.847 - *
1.848 - * @param ch the {@code char} value to be tested.
1.849 - * @return {@code true} if the {@code char} value is between
1.850 - * {@link #MIN_LOW_SURROGATE} and
1.851 - * {@link #MAX_LOW_SURROGATE} inclusive;
1.852 - * {@code false} otherwise.
1.853 - * @see Character#isHighSurrogate(char)
1.854 - * @since 1.5
1.855 - */
1.856 - public static boolean isLowSurrogate(char ch) {
1.857 - return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
1.858 - }
1.859 -
1.860 - /**
1.861 - * Determines if the given {@code char} value is a Unicode
1.862 - * <i>surrogate code unit</i>.
1.863 - *
1.864 - * <p>Such values do not represent characters by themselves,
1.865 - * but are used in the representation of
1.866 - * <a href="#supplementary">supplementary characters</a>
1.867 - * in the UTF-16 encoding.
1.868 - *
1.869 - * <p>A char value is a surrogate code unit if and only if it is either
1.870 - * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
1.871 - * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
1.872 - *
1.873 - * @param ch the {@code char} value to be tested.
1.874 - * @return {@code true} if the {@code char} value is between
1.875 - * {@link #MIN_SURROGATE} and
1.876 - * {@link #MAX_SURROGATE} inclusive;
1.877 - * {@code false} otherwise.
1.878 - * @since 1.7
1.879 - */
1.880 - public static boolean isSurrogate(char ch) {
1.881 - return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
1.882 - }
1.883 -
1.884 - /**
1.885 - * Determines whether the specified pair of {@code char}
1.886 - * values is a valid
1.887 - * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.888 - * Unicode surrogate pair</a>.
1.889 -
1.890 - * <p>This method is equivalent to the expression:
1.891 - * <blockquote><pre>
1.892 - * isHighSurrogate(high) && isLowSurrogate(low)
1.893 - * </pre></blockquote>
1.894 - *
1.895 - * @param high the high-surrogate code value to be tested
1.896 - * @param low the low-surrogate code value to be tested
1.897 - * @return {@code true} if the specified high and
1.898 - * low-surrogate code values represent a valid surrogate pair;
1.899 - * {@code false} otherwise.
1.900 - * @since 1.5
1.901 - */
1.902 - public static boolean isSurrogatePair(char high, char low) {
1.903 - return isHighSurrogate(high) && isLowSurrogate(low);
1.904 - }
1.905 -
1.906 - /**
1.907 - * Determines the number of {@code char} values needed to
1.908 - * represent the specified character (Unicode code point). If the
1.909 - * specified character is equal to or greater than 0x10000, then
1.910 - * the method returns 2. Otherwise, the method returns 1.
1.911 - *
1.912 - * <p>This method doesn't validate the specified character to be a
1.913 - * valid Unicode code point. The caller must validate the
1.914 - * character value using {@link #isValidCodePoint(int) isValidCodePoint}
1.915 - * if necessary.
1.916 - *
1.917 - * @param codePoint the character (Unicode code point) to be tested.
1.918 - * @return 2 if the character is a valid supplementary character; 1 otherwise.
1.919 - * @see Character#isSupplementaryCodePoint(int)
1.920 - * @since 1.5
1.921 - */
1.922 - public static int charCount(int codePoint) {
1.923 - return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
1.924 - }
1.925 -
1.926 - /**
1.927 - * Converts the specified surrogate pair to its supplementary code
1.928 - * point value. This method does not validate the specified
1.929 - * surrogate pair. The caller must validate it using {@link
1.930 - * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
1.931 - *
1.932 - * @param high the high-surrogate code unit
1.933 - * @param low the low-surrogate code unit
1.934 - * @return the supplementary code point composed from the
1.935 - * specified surrogate pair.
1.936 - * @since 1.5
1.937 - */
1.938 - public static int toCodePoint(char high, char low) {
1.939 - // Optimized form of:
1.940 - // return ((high - MIN_HIGH_SURROGATE) << 10)
1.941 - // + (low - MIN_LOW_SURROGATE)
1.942 - // + MIN_SUPPLEMENTARY_CODE_POINT;
1.943 - return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
1.944 - - (MIN_HIGH_SURROGATE << 10)
1.945 - - MIN_LOW_SURROGATE);
1.946 - }
1.947 -
1.948 - /**
1.949 - * Returns the code point at the given index of the
1.950 - * {@code CharSequence}. If the {@code char} value at
1.951 - * the given index in the {@code CharSequence} is in the
1.952 - * high-surrogate range, the following index is less than the
1.953 - * length of the {@code CharSequence}, and the
1.954 - * {@code char} value at the following index is in the
1.955 - * low-surrogate range, then the supplementary code point
1.956 - * corresponding to this surrogate pair is returned. Otherwise,
1.957 - * the {@code char} value at the given index is returned.
1.958 - *
1.959 - * @param seq a sequence of {@code char} values (Unicode code
1.960 - * units)
1.961 - * @param index the index to the {@code char} values (Unicode
1.962 - * code units) in {@code seq} to be converted
1.963 - * @return the Unicode code point at the given index
1.964 - * @exception NullPointerException if {@code seq} is null.
1.965 - * @exception IndexOutOfBoundsException if the value
1.966 - * {@code index} is negative or not less than
1.967 - * {@link CharSequence#length() seq.length()}.
1.968 - * @since 1.5
1.969 - */
1.970 - public static int codePointAt(CharSequence seq, int index) {
1.971 - char c1 = seq.charAt(index++);
1.972 - if (isHighSurrogate(c1)) {
1.973 - if (index < seq.length()) {
1.974 - char c2 = seq.charAt(index);
1.975 - if (isLowSurrogate(c2)) {
1.976 - return toCodePoint(c1, c2);
1.977 - }
1.978 - }
1.979 - }
1.980 - return c1;
1.981 - }
1.982 -
1.983 - /**
1.984 - * Returns the code point at the given index of the
1.985 - * {@code char} array. If the {@code char} value at
1.986 - * the given index in the {@code char} array is in the
1.987 - * high-surrogate range, the following index is less than the
1.988 - * length of the {@code char} array, and the
1.989 - * {@code char} value at the following index is in the
1.990 - * low-surrogate range, then the supplementary code point
1.991 - * corresponding to this surrogate pair is returned. Otherwise,
1.992 - * the {@code char} value at the given index is returned.
1.993 - *
1.994 - * @param a the {@code char} array
1.995 - * @param index the index to the {@code char} values (Unicode
1.996 - * code units) in the {@code char} array to be converted
1.997 - * @return the Unicode code point at the given index
1.998 - * @exception NullPointerException if {@code a} is null.
1.999 - * @exception IndexOutOfBoundsException if the value
1.1000 - * {@code index} is negative or not less than
1.1001 - * the length of the {@code char} array.
1.1002 - * @since 1.5
1.1003 - */
1.1004 - public static int codePointAt(char[] a, int index) {
1.1005 - return codePointAtImpl(a, index, a.length);
1.1006 - }
1.1007 -
1.1008 - /**
1.1009 - * Returns the code point at the given index of the
1.1010 - * {@code char} array, where only array elements with
1.1011 - * {@code index} less than {@code limit} can be used. If
1.1012 - * the {@code char} value at the given index in the
1.1013 - * {@code char} array is in the high-surrogate range, the
1.1014 - * following index is less than the {@code limit}, and the
1.1015 - * {@code char} value at the following index is in the
1.1016 - * low-surrogate range, then the supplementary code point
1.1017 - * corresponding to this surrogate pair is returned. Otherwise,
1.1018 - * the {@code char} value at the given index is returned.
1.1019 - *
1.1020 - * @param a the {@code char} array
1.1021 - * @param index the index to the {@code char} values (Unicode
1.1022 - * code units) in the {@code char} array to be converted
1.1023 - * @param limit the index after the last array element that
1.1024 - * can be used in the {@code char} array
1.1025 - * @return the Unicode code point at the given index
1.1026 - * @exception NullPointerException if {@code a} is null.
1.1027 - * @exception IndexOutOfBoundsException if the {@code index}
1.1028 - * argument is negative or not less than the {@code limit}
1.1029 - * argument, or if the {@code limit} argument is negative or
1.1030 - * greater than the length of the {@code char} array.
1.1031 - * @since 1.5
1.1032 - */
1.1033 - public static int codePointAt(char[] a, int index, int limit) {
1.1034 - if (index >= limit || limit < 0 || limit > a.length) {
1.1035 - throw new IndexOutOfBoundsException();
1.1036 - }
1.1037 - return codePointAtImpl(a, index, limit);
1.1038 - }
1.1039 -
1.1040 - // throws ArrayIndexOutofBoundsException if index out of bounds
1.1041 - static int codePointAtImpl(char[] a, int index, int limit) {
1.1042 - char c1 = a[index++];
1.1043 - if (isHighSurrogate(c1)) {
1.1044 - if (index < limit) {
1.1045 - char c2 = a[index];
1.1046 - if (isLowSurrogate(c2)) {
1.1047 - return toCodePoint(c1, c2);
1.1048 - }
1.1049 - }
1.1050 - }
1.1051 - return c1;
1.1052 - }
1.1053 -
1.1054 - /**
1.1055 - * Returns the code point preceding the given index of the
1.1056 - * {@code CharSequence}. If the {@code char} value at
1.1057 - * {@code (index - 1)} in the {@code CharSequence} is in
1.1058 - * the low-surrogate range, {@code (index - 2)} is not
1.1059 - * negative, and the {@code char} value at {@code (index - 2)}
1.1060 - * in the {@code CharSequence} is in the
1.1061 - * high-surrogate range, then the supplementary code point
1.1062 - * corresponding to this surrogate pair is returned. Otherwise,
1.1063 - * the {@code char} value at {@code (index - 1)} is
1.1064 - * returned.
1.1065 - *
1.1066 - * @param seq the {@code CharSequence} instance
1.1067 - * @param index the index following the code point that should be returned
1.1068 - * @return the Unicode code point value before the given index.
1.1069 - * @exception NullPointerException if {@code seq} is null.
1.1070 - * @exception IndexOutOfBoundsException if the {@code index}
1.1071 - * argument is less than 1 or greater than {@link
1.1072 - * CharSequence#length() seq.length()}.
1.1073 - * @since 1.5
1.1074 - */
1.1075 - public static int codePointBefore(CharSequence seq, int index) {
1.1076 - char c2 = seq.charAt(--index);
1.1077 - if (isLowSurrogate(c2)) {
1.1078 - if (index > 0) {
1.1079 - char c1 = seq.charAt(--index);
1.1080 - if (isHighSurrogate(c1)) {
1.1081 - return toCodePoint(c1, c2);
1.1082 - }
1.1083 - }
1.1084 - }
1.1085 - return c2;
1.1086 - }
1.1087 -
1.1088 - /**
1.1089 - * Returns the code point preceding the given index of the
1.1090 - * {@code char} array. If the {@code char} value at
1.1091 - * {@code (index - 1)} in the {@code char} array is in
1.1092 - * the low-surrogate range, {@code (index - 2)} is not
1.1093 - * negative, and the {@code char} value at {@code (index - 2)}
1.1094 - * in the {@code char} array is in the
1.1095 - * high-surrogate range, then the supplementary code point
1.1096 - * corresponding to this surrogate pair is returned. Otherwise,
1.1097 - * the {@code char} value at {@code (index - 1)} is
1.1098 - * returned.
1.1099 - *
1.1100 - * @param a the {@code char} array
1.1101 - * @param index the index following the code point that should be returned
1.1102 - * @return the Unicode code point value before the given index.
1.1103 - * @exception NullPointerException if {@code a} is null.
1.1104 - * @exception IndexOutOfBoundsException if the {@code index}
1.1105 - * argument is less than 1 or greater than the length of the
1.1106 - * {@code char} array
1.1107 - * @since 1.5
1.1108 - */
1.1109 - public static int codePointBefore(char[] a, int index) {
1.1110 - return codePointBeforeImpl(a, index, 0);
1.1111 - }
1.1112 -
1.1113 - /**
1.1114 - * Returns the code point preceding the given index of the
1.1115 - * {@code char} array, where only array elements with
1.1116 - * {@code index} greater than or equal to {@code start}
1.1117 - * can be used. If the {@code char} value at {@code (index - 1)}
1.1118 - * in the {@code char} array is in the
1.1119 - * low-surrogate range, {@code (index - 2)} is not less than
1.1120 - * {@code start}, and the {@code char} value at
1.1121 - * {@code (index - 2)} in the {@code char} array is in
1.1122 - * the high-surrogate range, then the supplementary code point
1.1123 - * corresponding to this surrogate pair is returned. Otherwise,
1.1124 - * the {@code char} value at {@code (index - 1)} is
1.1125 - * returned.
1.1126 - *
1.1127 - * @param a the {@code char} array
1.1128 - * @param index the index following the code point that should be returned
1.1129 - * @param start the index of the first array element in the
1.1130 - * {@code char} array
1.1131 - * @return the Unicode code point value before the given index.
1.1132 - * @exception NullPointerException if {@code a} is null.
1.1133 - * @exception IndexOutOfBoundsException if the {@code index}
1.1134 - * argument is not greater than the {@code start} argument or
1.1135 - * is greater than the length of the {@code char} array, or
1.1136 - * if the {@code start} argument is negative or not less than
1.1137 - * the length of the {@code char} array.
1.1138 - * @since 1.5
1.1139 - */
1.1140 - public static int codePointBefore(char[] a, int index, int start) {
1.1141 - if (index <= start || start < 0 || start >= a.length) {
1.1142 - throw new IndexOutOfBoundsException();
1.1143 - }
1.1144 - return codePointBeforeImpl(a, index, start);
1.1145 - }
1.1146 -
1.1147 - // throws ArrayIndexOutofBoundsException if index-1 out of bounds
1.1148 - static int codePointBeforeImpl(char[] a, int index, int start) {
1.1149 - char c2 = a[--index];
1.1150 - if (isLowSurrogate(c2)) {
1.1151 - if (index > start) {
1.1152 - char c1 = a[--index];
1.1153 - if (isHighSurrogate(c1)) {
1.1154 - return toCodePoint(c1, c2);
1.1155 - }
1.1156 - }
1.1157 - }
1.1158 - return c2;
1.1159 - }
1.1160 -
1.1161 - /**
1.1162 - * Returns the leading surrogate (a
1.1163 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.1164 - * high surrogate code unit</a>) of the
1.1165 - * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.1166 - * surrogate pair</a>
1.1167 - * representing the specified supplementary character (Unicode
1.1168 - * code point) in the UTF-16 encoding. If the specified character
1.1169 - * is not a
1.1170 - * <a href="Character.html#supplementary">supplementary character</a>,
1.1171 - * an unspecified {@code char} is returned.
1.1172 - *
1.1173 - * <p>If
1.1174 - * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
1.1175 - * is {@code true}, then
1.1176 - * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
1.1177 - * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
1.1178 - * are also always {@code true}.
1.1179 - *
1.1180 - * @param codePoint a supplementary character (Unicode code point)
1.1181 - * @return the leading surrogate code unit used to represent the
1.1182 - * character in the UTF-16 encoding
1.1183 - * @since 1.7
1.1184 - */
1.1185 - public static char highSurrogate(int codePoint) {
1.1186 - return (char) ((codePoint >>> 10)
1.1187 - + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
1.1188 - }
1.1189 -
1.1190 - /**
1.1191 - * Returns the trailing surrogate (a
1.1192 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.1193 - * low surrogate code unit</a>) of the
1.1194 - * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.1195 - * surrogate pair</a>
1.1196 - * representing the specified supplementary character (Unicode
1.1197 - * code point) in the UTF-16 encoding. If the specified character
1.1198 - * is not a
1.1199 - * <a href="Character.html#supplementary">supplementary character</a>,
1.1200 - * an unspecified {@code char} is returned.
1.1201 - *
1.1202 - * <p>If
1.1203 - * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
1.1204 - * is {@code true}, then
1.1205 - * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
1.1206 - * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
1.1207 - * are also always {@code true}.
1.1208 - *
1.1209 - * @param codePoint a supplementary character (Unicode code point)
1.1210 - * @return the trailing surrogate code unit used to represent the
1.1211 - * character in the UTF-16 encoding
1.1212 - * @since 1.7
1.1213 - */
1.1214 - public static char lowSurrogate(int codePoint) {
1.1215 - return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
1.1216 - }
1.1217 -
1.1218 - /**
1.1219 - * Converts the specified character (Unicode code point) to its
1.1220 - * UTF-16 representation. If the specified code point is a BMP
1.1221 - * (Basic Multilingual Plane or Plane 0) value, the same value is
1.1222 - * stored in {@code dst[dstIndex]}, and 1 is returned. If the
1.1223 - * specified code point is a supplementary character, its
1.1224 - * surrogate values are stored in {@code dst[dstIndex]}
1.1225 - * (high-surrogate) and {@code dst[dstIndex+1]}
1.1226 - * (low-surrogate), and 2 is returned.
1.1227 - *
1.1228 - * @param codePoint the character (Unicode code point) to be converted.
1.1229 - * @param dst an array of {@code char} in which the
1.1230 - * {@code codePoint}'s UTF-16 value is stored.
1.1231 - * @param dstIndex the start index into the {@code dst}
1.1232 - * array where the converted value is stored.
1.1233 - * @return 1 if the code point is a BMP code point, 2 if the
1.1234 - * code point is a supplementary code point.
1.1235 - * @exception IllegalArgumentException if the specified
1.1236 - * {@code codePoint} is not a valid Unicode code point.
1.1237 - * @exception NullPointerException if the specified {@code dst} is null.
1.1238 - * @exception IndexOutOfBoundsException if {@code dstIndex}
1.1239 - * is negative or not less than {@code dst.length}, or if
1.1240 - * {@code dst} at {@code dstIndex} doesn't have enough
1.1241 - * array element(s) to store the resulting {@code char}
1.1242 - * value(s). (If {@code dstIndex} is equal to
1.1243 - * {@code dst.length-1} and the specified
1.1244 - * {@code codePoint} is a supplementary character, the
1.1245 - * high-surrogate value is not stored in
1.1246 - * {@code dst[dstIndex]}.)
1.1247 - * @since 1.5
1.1248 - */
1.1249 - public static int toChars(int codePoint, char[] dst, int dstIndex) {
1.1250 - if (isBmpCodePoint(codePoint)) {
1.1251 - dst[dstIndex] = (char) codePoint;
1.1252 - return 1;
1.1253 - } else if (isValidCodePoint(codePoint)) {
1.1254 - toSurrogates(codePoint, dst, dstIndex);
1.1255 - return 2;
1.1256 - } else {
1.1257 - throw new IllegalArgumentException();
1.1258 - }
1.1259 - }
1.1260 -
1.1261 - /**
1.1262 - * Converts the specified character (Unicode code point) to its
1.1263 - * UTF-16 representation stored in a {@code char} array. If
1.1264 - * the specified code point is a BMP (Basic Multilingual Plane or
1.1265 - * Plane 0) value, the resulting {@code char} array has
1.1266 - * the same value as {@code codePoint}. If the specified code
1.1267 - * point is a supplementary code point, the resulting
1.1268 - * {@code char} array has the corresponding surrogate pair.
1.1269 - *
1.1270 - * @param codePoint a Unicode code point
1.1271 - * @return a {@code char} array having
1.1272 - * {@code codePoint}'s UTF-16 representation.
1.1273 - * @exception IllegalArgumentException if the specified
1.1274 - * {@code codePoint} is not a valid Unicode code point.
1.1275 - * @since 1.5
1.1276 - */
1.1277 - public static char[] toChars(int codePoint) {
1.1278 - if (isBmpCodePoint(codePoint)) {
1.1279 - return new char[] { (char) codePoint };
1.1280 - } else if (isValidCodePoint(codePoint)) {
1.1281 - char[] result = new char[2];
1.1282 - toSurrogates(codePoint, result, 0);
1.1283 - return result;
1.1284 - } else {
1.1285 - throw new IllegalArgumentException();
1.1286 - }
1.1287 - }
1.1288 -
1.1289 - static void toSurrogates(int codePoint, char[] dst, int index) {
1.1290 - // We write elements "backwards" to guarantee all-or-nothing
1.1291 - dst[index+1] = lowSurrogate(codePoint);
1.1292 - dst[index] = highSurrogate(codePoint);
1.1293 - }
1.1294 -
1.1295 - /**
1.1296 - * Returns the number of Unicode code points in the text range of
1.1297 - * the specified char sequence. The text range begins at the
1.1298 - * specified {@code beginIndex} and extends to the
1.1299 - * {@code char} at index {@code endIndex - 1}. Thus the
1.1300 - * length (in {@code char}s) of the text range is
1.1301 - * {@code endIndex-beginIndex}. Unpaired surrogates within
1.1302 - * the text range count as one code point each.
1.1303 - *
1.1304 - * @param seq the char sequence
1.1305 - * @param beginIndex the index to the first {@code char} of
1.1306 - * the text range.
1.1307 - * @param endIndex the index after the last {@code char} of
1.1308 - * the text range.
1.1309 - * @return the number of Unicode code points in the specified text
1.1310 - * range
1.1311 - * @exception NullPointerException if {@code seq} is null.
1.1312 - * @exception IndexOutOfBoundsException if the
1.1313 - * {@code beginIndex} is negative, or {@code endIndex}
1.1314 - * is larger than the length of the given sequence, or
1.1315 - * {@code beginIndex} is larger than {@code endIndex}.
1.1316 - * @since 1.5
1.1317 - */
1.1318 - public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
1.1319 - int length = seq.length();
1.1320 - if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
1.1321 - throw new IndexOutOfBoundsException();
1.1322 - }
1.1323 - int n = endIndex - beginIndex;
1.1324 - for (int i = beginIndex; i < endIndex; ) {
1.1325 - if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
1.1326 - isLowSurrogate(seq.charAt(i))) {
1.1327 - n--;
1.1328 - i++;
1.1329 - }
1.1330 - }
1.1331 - return n;
1.1332 - }
1.1333 -
1.1334 - /**
1.1335 - * Returns the number of Unicode code points in a subarray of the
1.1336 - * {@code char} array argument. The {@code offset}
1.1337 - * argument is the index of the first {@code char} of the
1.1338 - * subarray and the {@code count} argument specifies the
1.1339 - * length of the subarray in {@code char}s. Unpaired
1.1340 - * surrogates within the subarray count as one code point each.
1.1341 - *
1.1342 - * @param a the {@code char} array
1.1343 - * @param offset the index of the first {@code char} in the
1.1344 - * given {@code char} array
1.1345 - * @param count the length of the subarray in {@code char}s
1.1346 - * @return the number of Unicode code points in the specified subarray
1.1347 - * @exception NullPointerException if {@code a} is null.
1.1348 - * @exception IndexOutOfBoundsException if {@code offset} or
1.1349 - * {@code count} is negative, or if {@code offset +
1.1350 - * count} is larger than the length of the given array.
1.1351 - * @since 1.5
1.1352 - */
1.1353 - public static int codePointCount(char[] a, int offset, int count) {
1.1354 - if (count > a.length - offset || offset < 0 || count < 0) {
1.1355 - throw new IndexOutOfBoundsException();
1.1356 - }
1.1357 - return codePointCountImpl(a, offset, count);
1.1358 - }
1.1359 -
1.1360 - static int codePointCountImpl(char[] a, int offset, int count) {
1.1361 - int endIndex = offset + count;
1.1362 - int n = count;
1.1363 - for (int i = offset; i < endIndex; ) {
1.1364 - if (isHighSurrogate(a[i++]) && i < endIndex &&
1.1365 - isLowSurrogate(a[i])) {
1.1366 - n--;
1.1367 - i++;
1.1368 - }
1.1369 - }
1.1370 - return n;
1.1371 - }
1.1372 -
1.1373 - /**
1.1374 - * Returns the index within the given char sequence that is offset
1.1375 - * from the given {@code index} by {@code codePointOffset}
1.1376 - * code points. Unpaired surrogates within the text range given by
1.1377 - * {@code index} and {@code codePointOffset} count as
1.1378 - * one code point each.
1.1379 - *
1.1380 - * @param seq the char sequence
1.1381 - * @param index the index to be offset
1.1382 - * @param codePointOffset the offset in code points
1.1383 - * @return the index within the char sequence
1.1384 - * @exception NullPointerException if {@code seq} is null.
1.1385 - * @exception IndexOutOfBoundsException if {@code index}
1.1386 - * is negative or larger then the length of the char sequence,
1.1387 - * or if {@code codePointOffset} is positive and the
1.1388 - * subsequence starting with {@code index} has fewer than
1.1389 - * {@code codePointOffset} code points, or if
1.1390 - * {@code codePointOffset} is negative and the subsequence
1.1391 - * before {@code index} has fewer than the absolute value
1.1392 - * of {@code codePointOffset} code points.
1.1393 - * @since 1.5
1.1394 - */
1.1395 - public static int offsetByCodePoints(CharSequence seq, int index,
1.1396 - int codePointOffset) {
1.1397 - int length = seq.length();
1.1398 - if (index < 0 || index > length) {
1.1399 - throw new IndexOutOfBoundsException();
1.1400 - }
1.1401 -
1.1402 - int x = index;
1.1403 - if (codePointOffset >= 0) {
1.1404 - int i;
1.1405 - for (i = 0; x < length && i < codePointOffset; i++) {
1.1406 - if (isHighSurrogate(seq.charAt(x++)) && x < length &&
1.1407 - isLowSurrogate(seq.charAt(x))) {
1.1408 - x++;
1.1409 - }
1.1410 - }
1.1411 - if (i < codePointOffset) {
1.1412 - throw new IndexOutOfBoundsException();
1.1413 - }
1.1414 - } else {
1.1415 - int i;
1.1416 - for (i = codePointOffset; x > 0 && i < 0; i++) {
1.1417 - if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
1.1418 - isHighSurrogate(seq.charAt(x-1))) {
1.1419 - x--;
1.1420 - }
1.1421 - }
1.1422 - if (i < 0) {
1.1423 - throw new IndexOutOfBoundsException();
1.1424 - }
1.1425 - }
1.1426 - return x;
1.1427 - }
1.1428 -
1.1429 - /**
1.1430 - * Returns the index within the given {@code char} subarray
1.1431 - * that is offset from the given {@code index} by
1.1432 - * {@code codePointOffset} code points. The
1.1433 - * {@code start} and {@code count} arguments specify a
1.1434 - * subarray of the {@code char} array. Unpaired surrogates
1.1435 - * within the text range given by {@code index} and
1.1436 - * {@code codePointOffset} count as one code point each.
1.1437 - *
1.1438 - * @param a the {@code char} array
1.1439 - * @param start the index of the first {@code char} of the
1.1440 - * subarray
1.1441 - * @param count the length of the subarray in {@code char}s
1.1442 - * @param index the index to be offset
1.1443 - * @param codePointOffset the offset in code points
1.1444 - * @return the index within the subarray
1.1445 - * @exception NullPointerException if {@code a} is null.
1.1446 - * @exception IndexOutOfBoundsException
1.1447 - * if {@code start} or {@code count} is negative,
1.1448 - * or if {@code start + count} is larger than the length of
1.1449 - * the given array,
1.1450 - * or if {@code index} is less than {@code start} or
1.1451 - * larger then {@code start + count},
1.1452 - * or if {@code codePointOffset} is positive and the text range
1.1453 - * starting with {@code index} and ending with {@code start + count - 1}
1.1454 - * has fewer than {@code codePointOffset} code
1.1455 - * points,
1.1456 - * or if {@code codePointOffset} is negative and the text range
1.1457 - * starting with {@code start} and ending with {@code index - 1}
1.1458 - * has fewer than the absolute value of
1.1459 - * {@code codePointOffset} code points.
1.1460 - * @since 1.5
1.1461 - */
1.1462 - public static int offsetByCodePoints(char[] a, int start, int count,
1.1463 - int index, int codePointOffset) {
1.1464 - if (count > a.length-start || start < 0 || count < 0
1.1465 - || index < start || index > start+count) {
1.1466 - throw new IndexOutOfBoundsException();
1.1467 - }
1.1468 - return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
1.1469 - }
1.1470 -
1.1471 - static int offsetByCodePointsImpl(char[]a, int start, int count,
1.1472 - int index, int codePointOffset) {
1.1473 - int x = index;
1.1474 - if (codePointOffset >= 0) {
1.1475 - int limit = start + count;
1.1476 - int i;
1.1477 - for (i = 0; x < limit && i < codePointOffset; i++) {
1.1478 - if (isHighSurrogate(a[x++]) && x < limit &&
1.1479 - isLowSurrogate(a[x])) {
1.1480 - x++;
1.1481 - }
1.1482 - }
1.1483 - if (i < codePointOffset) {
1.1484 - throw new IndexOutOfBoundsException();
1.1485 - }
1.1486 - } else {
1.1487 - int i;
1.1488 - for (i = codePointOffset; x > start && i < 0; i++) {
1.1489 - if (isLowSurrogate(a[--x]) && x > start &&
1.1490 - isHighSurrogate(a[x-1])) {
1.1491 - x--;
1.1492 - }
1.1493 - }
1.1494 - if (i < 0) {
1.1495 - throw new IndexOutOfBoundsException();
1.1496 - }
1.1497 - }
1.1498 - return x;
1.1499 - }
1.1500 -
1.1501 - /**
1.1502 - * Determines if the specified character is a lowercase character.
1.1503 - * <p>
1.1504 - * A character is lowercase if its general category type, provided
1.1505 - * by {@code Character.getType(ch)}, is
1.1506 - * {@code LOWERCASE_LETTER}, or it has contributory property
1.1507 - * Other_Lowercase as defined by the Unicode Standard.
1.1508 - * <p>
1.1509 - * The following are examples of lowercase characters:
1.1510 - * <p><blockquote><pre>
1.1511 - * a b c d e f g h i j k l m n o p q r s t u v w x y z
1.1512 - * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
1.1513 - * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
1.1514 - * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
1.1515 - * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
1.1516 - * </pre></blockquote>
1.1517 - * <p> Many other Unicode characters are lowercase too.
1.1518 - *
1.1519 - * <p><b>Note:</b> This method cannot handle <a
1.1520 - * href="#supplementary"> supplementary characters</a>. To support
1.1521 - * all Unicode characters, including supplementary characters, use
1.1522 - * the {@link #isLowerCase(int)} method.
1.1523 - *
1.1524 - * @param ch the character to be tested.
1.1525 - * @return {@code true} if the character is lowercase;
1.1526 - * {@code false} otherwise.
1.1527 - * @see Character#isLowerCase(char)
1.1528 - * @see Character#isTitleCase(char)
1.1529 - * @see Character#toLowerCase(char)
1.1530 - * @see Character#getType(char)
1.1531 - */
1.1532 - public static boolean isLowerCase(char ch) {
1.1533 - return ch == toLowerCase(ch);
1.1534 - }
1.1535 -
1.1536 - /**
1.1537 - * Determines if the specified character is an uppercase character.
1.1538 - * <p>
1.1539 - * A character is uppercase if its general category type, provided by
1.1540 - * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
1.1541 - * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
1.1542 - * <p>
1.1543 - * The following are examples of uppercase characters:
1.1544 - * <p><blockquote><pre>
1.1545 - * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
1.1546 - * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
1.1547 - * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
1.1548 - * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
1.1549 - * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
1.1550 - * </pre></blockquote>
1.1551 - * <p> Many other Unicode characters are uppercase too.<p>
1.1552 - *
1.1553 - * <p><b>Note:</b> This method cannot handle <a
1.1554 - * href="#supplementary"> supplementary characters</a>. To support
1.1555 - * all Unicode characters, including supplementary characters, use
1.1556 - * the {@link #isUpperCase(int)} method.
1.1557 - *
1.1558 - * @param ch the character to be tested.
1.1559 - * @return {@code true} if the character is uppercase;
1.1560 - * {@code false} otherwise.
1.1561 - * @see Character#isLowerCase(char)
1.1562 - * @see Character#isTitleCase(char)
1.1563 - * @see Character#toUpperCase(char)
1.1564 - * @see Character#getType(char)
1.1565 - * @since 1.0
1.1566 - */
1.1567 - public static boolean isUpperCase(char ch) {
1.1568 - return ch == toUpperCase(ch);
1.1569 - }
1.1570 -
1.1571 - /**
1.1572 - * Determines if the specified character is a titlecase character.
1.1573 - * <p>
1.1574 - * A character is a titlecase character if its general
1.1575 - * category type, provided by {@code Character.getType(ch)},
1.1576 - * is {@code TITLECASE_LETTER}.
1.1577 - * <p>
1.1578 - * Some characters look like pairs of Latin letters. For example, there
1.1579 - * is an uppercase letter that looks like "LJ" and has a corresponding
1.1580 - * lowercase letter that looks like "lj". A third form, which looks like "Lj",
1.1581 - * is the appropriate form to use when rendering a word in lowercase
1.1582 - * with initial capitals, as for a book title.
1.1583 - * <p>
1.1584 - * These are some of the Unicode characters for which this method returns
1.1585 - * {@code true}:
1.1586 - * <ul>
1.1587 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
1.1588 - * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
1.1589 - * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
1.1590 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
1.1591 - * </ul>
1.1592 - * <p> Many other Unicode characters are titlecase too.<p>
1.1593 - *
1.1594 - * <p><b>Note:</b> This method cannot handle <a
1.1595 - * href="#supplementary"> supplementary characters</a>. To support
1.1596 - * all Unicode characters, including supplementary characters, use
1.1597 - * the {@link #isTitleCase(int)} method.
1.1598 - *
1.1599 - * @param ch the character to be tested.
1.1600 - * @return {@code true} if the character is titlecase;
1.1601 - * {@code false} otherwise.
1.1602 - * @see Character#isLowerCase(char)
1.1603 - * @see Character#isUpperCase(char)
1.1604 - * @see Character#toTitleCase(char)
1.1605 - * @see Character#getType(char)
1.1606 - * @since 1.0.2
1.1607 - */
1.1608 - public static boolean isTitleCase(char ch) {
1.1609 - return isTitleCase((int)ch);
1.1610 - }
1.1611 -
1.1612 - /**
1.1613 - * Determines if the specified character (Unicode code point) is a titlecase character.
1.1614 - * <p>
1.1615 - * A character is a titlecase character if its general
1.1616 - * category type, provided by {@link Character#getType(int) getType(codePoint)},
1.1617 - * is {@code TITLECASE_LETTER}.
1.1618 - * <p>
1.1619 - * Some characters look like pairs of Latin letters. For example, there
1.1620 - * is an uppercase letter that looks like "LJ" and has a corresponding
1.1621 - * lowercase letter that looks like "lj". A third form, which looks like "Lj",
1.1622 - * is the appropriate form to use when rendering a word in lowercase
1.1623 - * with initial capitals, as for a book title.
1.1624 - * <p>
1.1625 - * These are some of the Unicode characters for which this method returns
1.1626 - * {@code true}:
1.1627 - * <ul>
1.1628 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
1.1629 - * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
1.1630 - * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
1.1631 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
1.1632 - * </ul>
1.1633 - * <p> Many other Unicode characters are titlecase too.<p>
1.1634 - *
1.1635 - * @param codePoint the character (Unicode code point) to be tested.
1.1636 - * @return {@code true} if the character is titlecase;
1.1637 - * {@code false} otherwise.
1.1638 - * @see Character#isLowerCase(int)
1.1639 - * @see Character#isUpperCase(int)
1.1640 - * @see Character#toTitleCase(int)
1.1641 - * @see Character#getType(int)
1.1642 - * @since 1.5
1.1643 - */
1.1644 - public static boolean isTitleCase(int codePoint) {
1.1645 - return getType(codePoint) == Character.TITLECASE_LETTER;
1.1646 - }
1.1647 -
1.1648 - /**
1.1649 - * Determines if the specified character is a digit.
1.1650 - * <p>
1.1651 - * A character is a digit if its general category type, provided
1.1652 - * by {@code Character.getType(ch)}, is
1.1653 - * {@code DECIMAL_DIGIT_NUMBER}.
1.1654 - * <p>
1.1655 - * Some Unicode character ranges that contain digits:
1.1656 - * <ul>
1.1657 - * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
1.1658 - * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
1.1659 - * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
1.1660 - * Arabic-Indic digits
1.1661 - * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
1.1662 - * Extended Arabic-Indic digits
1.1663 - * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
1.1664 - * Devanagari digits
1.1665 - * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
1.1666 - * Fullwidth digits
1.1667 - * </ul>
1.1668 - *
1.1669 - * Many other character ranges contain digits as well.
1.1670 - *
1.1671 - * <p><b>Note:</b> This method cannot handle <a
1.1672 - * href="#supplementary"> supplementary characters</a>. To support
1.1673 - * all Unicode characters, including supplementary characters, use
1.1674 - * the {@link #isDigit(int)} method.
1.1675 - *
1.1676 - * @param ch the character to be tested.
1.1677 - * @return {@code true} if the character is a digit;
1.1678 - * {@code false} otherwise.
1.1679 - * @see Character#digit(char, int)
1.1680 - * @see Character#forDigit(int, int)
1.1681 - * @see Character#getType(char)
1.1682 - */
1.1683 - public static boolean isDigit(char ch) {
1.1684 - return String.valueOf(ch).matches("\\d");
1.1685 - }
1.1686 -
1.1687 - /**
1.1688 - * Determines if the specified character (Unicode code point) is a digit.
1.1689 - * <p>
1.1690 - * A character is a digit if its general category type, provided
1.1691 - * by {@link Character#getType(int) getType(codePoint)}, is
1.1692 - * {@code DECIMAL_DIGIT_NUMBER}.
1.1693 - * <p>
1.1694 - * Some Unicode character ranges that contain digits:
1.1695 - * <ul>
1.1696 - * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
1.1697 - * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
1.1698 - * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
1.1699 - * Arabic-Indic digits
1.1700 - * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
1.1701 - * Extended Arabic-Indic digits
1.1702 - * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
1.1703 - * Devanagari digits
1.1704 - * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
1.1705 - * Fullwidth digits
1.1706 - * </ul>
1.1707 - *
1.1708 - * Many other character ranges contain digits as well.
1.1709 - *
1.1710 - * @param codePoint the character (Unicode code point) to be tested.
1.1711 - * @return {@code true} if the character is a digit;
1.1712 - * {@code false} otherwise.
1.1713 - * @see Character#forDigit(int, int)
1.1714 - * @see Character#getType(int)
1.1715 - * @since 1.5
1.1716 - */
1.1717 - public static boolean isDigit(int codePoint) {
1.1718 - return fromCodeChars(codePoint).matches("\\d");
1.1719 - }
1.1720 -
1.1721 - @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
1.1722 - private native static String fromCodeChars(int codePoint);
1.1723 -
1.1724 - /**
1.1725 - * Determines if a character is defined in Unicode.
1.1726 - * <p>
1.1727 - * A character is defined if at least one of the following is true:
1.1728 - * <ul>
1.1729 - * <li>It has an entry in the UnicodeData file.
1.1730 - * <li>It has a value in a range defined by the UnicodeData file.
1.1731 - * </ul>
1.1732 - *
1.1733 - * <p><b>Note:</b> This method cannot handle <a
1.1734 - * href="#supplementary"> supplementary characters</a>. To support
1.1735 - * all Unicode characters, including supplementary characters, use
1.1736 - * the {@link #isDefined(int)} method.
1.1737 - *
1.1738 - * @param ch the character to be tested
1.1739 - * @return {@code true} if the character has a defined meaning
1.1740 - * in Unicode; {@code false} otherwise.
1.1741 - * @see Character#isDigit(char)
1.1742 - * @see Character#isLetter(char)
1.1743 - * @see Character#isLetterOrDigit(char)
1.1744 - * @see Character#isLowerCase(char)
1.1745 - * @see Character#isTitleCase(char)
1.1746 - * @see Character#isUpperCase(char)
1.1747 - * @since 1.0.2
1.1748 - */
1.1749 - public static boolean isDefined(char ch) {
1.1750 - return isDefined((int)ch);
1.1751 - }
1.1752 -
1.1753 - /**
1.1754 - * Determines if a character (Unicode code point) is defined in Unicode.
1.1755 - * <p>
1.1756 - * A character is defined if at least one of the following is true:
1.1757 - * <ul>
1.1758 - * <li>It has an entry in the UnicodeData file.
1.1759 - * <li>It has a value in a range defined by the UnicodeData file.
1.1760 - * </ul>
1.1761 - *
1.1762 - * @param codePoint the character (Unicode code point) to be tested.
1.1763 - * @return {@code true} if the character has a defined meaning
1.1764 - * in Unicode; {@code false} otherwise.
1.1765 - * @see Character#isDigit(int)
1.1766 - * @see Character#isLetter(int)
1.1767 - * @see Character#isLetterOrDigit(int)
1.1768 - * @see Character#isLowerCase(int)
1.1769 - * @see Character#isTitleCase(int)
1.1770 - * @see Character#isUpperCase(int)
1.1771 - * @since 1.5
1.1772 - */
1.1773 - public static boolean isDefined(int codePoint) {
1.1774 - return getType(codePoint) != Character.UNASSIGNED;
1.1775 - }
1.1776 -
1.1777 - /**
1.1778 - * Determines if the specified character is a letter.
1.1779 - * <p>
1.1780 - * A character is considered to be a letter if its general
1.1781 - * category type, provided by {@code Character.getType(ch)},
1.1782 - * is any of the following:
1.1783 - * <ul>
1.1784 - * <li> {@code UPPERCASE_LETTER}
1.1785 - * <li> {@code LOWERCASE_LETTER}
1.1786 - * <li> {@code TITLECASE_LETTER}
1.1787 - * <li> {@code MODIFIER_LETTER}
1.1788 - * <li> {@code OTHER_LETTER}
1.1789 - * </ul>
1.1790 - *
1.1791 - * Not all letters have case. Many characters are
1.1792 - * letters but are neither uppercase nor lowercase nor titlecase.
1.1793 - *
1.1794 - * <p><b>Note:</b> This method cannot handle <a
1.1795 - * href="#supplementary"> supplementary characters</a>. To support
1.1796 - * all Unicode characters, including supplementary characters, use
1.1797 - * the {@link #isLetter(int)} method.
1.1798 - *
1.1799 - * @param ch the character to be tested.
1.1800 - * @return {@code true} if the character is a letter;
1.1801 - * {@code false} otherwise.
1.1802 - * @see Character#isDigit(char)
1.1803 - * @see Character#isJavaIdentifierStart(char)
1.1804 - * @see Character#isJavaLetter(char)
1.1805 - * @see Character#isJavaLetterOrDigit(char)
1.1806 - * @see Character#isLetterOrDigit(char)
1.1807 - * @see Character#isLowerCase(char)
1.1808 - * @see Character#isTitleCase(char)
1.1809 - * @see Character#isUnicodeIdentifierStart(char)
1.1810 - * @see Character#isUpperCase(char)
1.1811 - */
1.1812 - public static boolean isLetter(char ch) {
1.1813 - return String.valueOf(ch).matches("\\w") && !isDigit(ch);
1.1814 - }
1.1815 -
1.1816 - /**
1.1817 - * Determines if the specified character (Unicode code point) is a letter.
1.1818 - * <p>
1.1819 - * A character is considered to be a letter if its general
1.1820 - * category type, provided by {@link Character#getType(int) getType(codePoint)},
1.1821 - * is any of the following:
1.1822 - * <ul>
1.1823 - * <li> {@code UPPERCASE_LETTER}
1.1824 - * <li> {@code LOWERCASE_LETTER}
1.1825 - * <li> {@code TITLECASE_LETTER}
1.1826 - * <li> {@code MODIFIER_LETTER}
1.1827 - * <li> {@code OTHER_LETTER}
1.1828 - * </ul>
1.1829 - *
1.1830 - * Not all letters have case. Many characters are
1.1831 - * letters but are neither uppercase nor lowercase nor titlecase.
1.1832 - *
1.1833 - * @param codePoint the character (Unicode code point) to be tested.
1.1834 - * @return {@code true} if the character is a letter;
1.1835 - * {@code false} otherwise.
1.1836 - * @see Character#isDigit(int)
1.1837 - * @see Character#isJavaIdentifierStart(int)
1.1838 - * @see Character#isLetterOrDigit(int)
1.1839 - * @see Character#isLowerCase(int)
1.1840 - * @see Character#isTitleCase(int)
1.1841 - * @see Character#isUnicodeIdentifierStart(int)
1.1842 - * @see Character#isUpperCase(int)
1.1843 - * @since 1.5
1.1844 - */
1.1845 - public static boolean isLetter(int codePoint) {
1.1846 - return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
1.1847 - }
1.1848 -
1.1849 - /**
1.1850 - * Determines if the specified character is a letter or digit.
1.1851 - * <p>
1.1852 - * A character is considered to be a letter or digit if either
1.1853 - * {@code Character.isLetter(char ch)} or
1.1854 - * {@code Character.isDigit(char ch)} returns
1.1855 - * {@code true} for the character.
1.1856 - *
1.1857 - * <p><b>Note:</b> This method cannot handle <a
1.1858 - * href="#supplementary"> supplementary characters</a>. To support
1.1859 - * all Unicode characters, including supplementary characters, use
1.1860 - * the {@link #isLetterOrDigit(int)} method.
1.1861 - *
1.1862 - * @param ch the character to be tested.
1.1863 - * @return {@code true} if the character is a letter or digit;
1.1864 - * {@code false} otherwise.
1.1865 - * @see Character#isDigit(char)
1.1866 - * @see Character#isJavaIdentifierPart(char)
1.1867 - * @see Character#isJavaLetter(char)
1.1868 - * @see Character#isJavaLetterOrDigit(char)
1.1869 - * @see Character#isLetter(char)
1.1870 - * @see Character#isUnicodeIdentifierPart(char)
1.1871 - * @since 1.0.2
1.1872 - */
1.1873 - public static boolean isLetterOrDigit(char ch) {
1.1874 - return String.valueOf(ch).matches("\\w");
1.1875 - }
1.1876 -
1.1877 - /**
1.1878 - * Determines if the specified character (Unicode code point) is a letter or digit.
1.1879 - * <p>
1.1880 - * A character is considered to be a letter or digit if either
1.1881 - * {@link #isLetter(int) isLetter(codePoint)} or
1.1882 - * {@link #isDigit(int) isDigit(codePoint)} returns
1.1883 - * {@code true} for the character.
1.1884 - *
1.1885 - * @param codePoint the character (Unicode code point) to be tested.
1.1886 - * @return {@code true} if the character is a letter or digit;
1.1887 - * {@code false} otherwise.
1.1888 - * @see Character#isDigit(int)
1.1889 - * @see Character#isJavaIdentifierPart(int)
1.1890 - * @see Character#isLetter(int)
1.1891 - * @see Character#isUnicodeIdentifierPart(int)
1.1892 - * @since 1.5
1.1893 - */
1.1894 - public static boolean isLetterOrDigit(int codePoint) {
1.1895 - return fromCodeChars(codePoint).matches("\\w");
1.1896 - }
1.1897 -
1.1898 - static int getType(int x) {
1.1899 - throw new UnsupportedOperationException();
1.1900 - }
1.1901 -
1.1902 - /**
1.1903 - * Determines if the specified character is
1.1904 - * permissible as the first character in a Java identifier.
1.1905 - * <p>
1.1906 - * A character may start a Java identifier if and only if
1.1907 - * one of the following conditions is true:
1.1908 - * <ul>
1.1909 - * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
1.1910 - * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
1.1911 - * <li> {@code ch} is a currency symbol (such as {@code '$'})
1.1912 - * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
1.1913 - * </ul>
1.1914 - *
1.1915 - * <p><b>Note:</b> This method cannot handle <a
1.1916 - * href="#supplementary"> supplementary characters</a>. To support
1.1917 - * all Unicode characters, including supplementary characters, use
1.1918 - * the {@link #isJavaIdentifierStart(int)} method.
1.1919 - *
1.1920 - * @param ch the character to be tested.
1.1921 - * @return {@code true} if the character may start a Java identifier;
1.1922 - * {@code false} otherwise.
1.1923 - * @see Character#isJavaIdentifierPart(char)
1.1924 - * @see Character#isLetter(char)
1.1925 - * @see Character#isUnicodeIdentifierStart(char)
1.1926 - * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
1.1927 - * @since 1.1
1.1928 - */
1.1929 - public static boolean isJavaIdentifierStart(char ch) {
1.1930 - return isJavaIdentifierStart((int)ch);
1.1931 - }
1.1932 -
1.1933 - /**
1.1934 - * Determines if the character (Unicode code point) is
1.1935 - * permissible as the first character in a Java identifier.
1.1936 - * <p>
1.1937 - * A character may start a Java identifier if and only if
1.1938 - * one of the following conditions is true:
1.1939 - * <ul>
1.1940 - * <li> {@link #isLetter(int) isLetter(codePoint)}
1.1941 - * returns {@code true}
1.1942 - * <li> {@link #getType(int) getType(codePoint)}
1.1943 - * returns {@code LETTER_NUMBER}
1.1944 - * <li> the referenced character is a currency symbol (such as {@code '$'})
1.1945 - * <li> the referenced character is a connecting punctuation character
1.1946 - * (such as {@code '_'}).
1.1947 - * </ul>
1.1948 - *
1.1949 - * @param codePoint the character (Unicode code point) to be tested.
1.1950 - * @return {@code true} if the character may start a Java identifier;
1.1951 - * {@code false} otherwise.
1.1952 - * @see Character#isJavaIdentifierPart(int)
1.1953 - * @see Character#isLetter(int)
1.1954 - * @see Character#isUnicodeIdentifierStart(int)
1.1955 - * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
1.1956 - * @since 1.5
1.1957 - */
1.1958 - public static boolean isJavaIdentifierStart(int codePoint) {
1.1959 - return
1.1960 - ('A' <= codePoint && codePoint <= 'Z') ||
1.1961 - ('a' <= codePoint && codePoint <= 'z');
1.1962 - }
1.1963 -
1.1964 - /**
1.1965 - * Determines if the specified character may be part of a Java
1.1966 - * identifier as other than the first character.
1.1967 - * <p>
1.1968 - * A character may be part of a Java identifier if any of the following
1.1969 - * are true:
1.1970 - * <ul>
1.1971 - * <li> it is a letter
1.1972 - * <li> it is a currency symbol (such as {@code '$'})
1.1973 - * <li> it is a connecting punctuation character (such as {@code '_'})
1.1974 - * <li> it is a digit
1.1975 - * <li> it is a numeric letter (such as a Roman numeral character)
1.1976 - * <li> it is a combining mark
1.1977 - * <li> it is a non-spacing mark
1.1978 - * <li> {@code isIdentifierIgnorable} returns
1.1979 - * {@code true} for the character
1.1980 - * </ul>
1.1981 - *
1.1982 - * <p><b>Note:</b> This method cannot handle <a
1.1983 - * href="#supplementary"> supplementary characters</a>. To support
1.1984 - * all Unicode characters, including supplementary characters, use
1.1985 - * the {@link #isJavaIdentifierPart(int)} method.
1.1986 - *
1.1987 - * @param ch the character to be tested.
1.1988 - * @return {@code true} if the character may be part of a
1.1989 - * Java identifier; {@code false} otherwise.
1.1990 - * @see Character#isIdentifierIgnorable(char)
1.1991 - * @see Character#isJavaIdentifierStart(char)
1.1992 - * @see Character#isLetterOrDigit(char)
1.1993 - * @see Character#isUnicodeIdentifierPart(char)
1.1994 - * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
1.1995 - * @since 1.1
1.1996 - */
1.1997 - public static boolean isJavaIdentifierPart(char ch) {
1.1998 - return isJavaIdentifierPart((int)ch);
1.1999 - }
1.2000 -
1.2001 - /**
1.2002 - * Determines if the character (Unicode code point) may be part of a Java
1.2003 - * identifier as other than the first character.
1.2004 - * <p>
1.2005 - * A character may be part of a Java identifier if any of the following
1.2006 - * are true:
1.2007 - * <ul>
1.2008 - * <li> it is a letter
1.2009 - * <li> it is a currency symbol (such as {@code '$'})
1.2010 - * <li> it is a connecting punctuation character (such as {@code '_'})
1.2011 - * <li> it is a digit
1.2012 - * <li> it is a numeric letter (such as a Roman numeral character)
1.2013 - * <li> it is a combining mark
1.2014 - * <li> it is a non-spacing mark
1.2015 - * <li> {@link #isIdentifierIgnorable(int)
1.2016 - * isIdentifierIgnorable(codePoint)} returns {@code true} for
1.2017 - * the character
1.2018 - * </ul>
1.2019 - *
1.2020 - * @param codePoint the character (Unicode code point) to be tested.
1.2021 - * @return {@code true} if the character may be part of a
1.2022 - * Java identifier; {@code false} otherwise.
1.2023 - * @see Character#isIdentifierIgnorable(int)
1.2024 - * @see Character#isJavaIdentifierStart(int)
1.2025 - * @see Character#isLetterOrDigit(int)
1.2026 - * @see Character#isUnicodeIdentifierPart(int)
1.2027 - * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
1.2028 - * @since 1.5
1.2029 - */
1.2030 - public static boolean isJavaIdentifierPart(int codePoint) {
1.2031 - return isJavaIdentifierStart(codePoint) ||
1.2032 - ('0' <= codePoint && codePoint <= '9') || codePoint == '$';
1.2033 - }
1.2034 -
1.2035 - /**
1.2036 - * Converts the character argument to lowercase using case
1.2037 - * mapping information from the UnicodeData file.
1.2038 - * <p>
1.2039 - * Note that
1.2040 - * {@code Character.isLowerCase(Character.toLowerCase(ch))}
1.2041 - * does not always return {@code true} for some ranges of
1.2042 - * characters, particularly those that are symbols or ideographs.
1.2043 - *
1.2044 - * <p>In general, {@link String#toLowerCase()} should be used to map
1.2045 - * characters to lowercase. {@code String} case mapping methods
1.2046 - * have several benefits over {@code Character} case mapping methods.
1.2047 - * {@code String} case mapping methods can perform locale-sensitive
1.2048 - * mappings, context-sensitive mappings, and 1:M character mappings, whereas
1.2049 - * the {@code Character} case mapping methods cannot.
1.2050 - *
1.2051 - * <p><b>Note:</b> This method cannot handle <a
1.2052 - * href="#supplementary"> supplementary characters</a>. To support
1.2053 - * all Unicode characters, including supplementary characters, use
1.2054 - * the {@link #toLowerCase(int)} method.
1.2055 - *
1.2056 - * @param ch the character to be converted.
1.2057 - * @return the lowercase equivalent of the character, if any;
1.2058 - * otherwise, the character itself.
1.2059 - * @see Character#isLowerCase(char)
1.2060 - * @see String#toLowerCase()
1.2061 - */
1.2062 - public static char toLowerCase(char ch) {
1.2063 - return String.valueOf(ch).toLowerCase().charAt(0);
1.2064 - }
1.2065 -
1.2066 - /**
1.2067 - * Converts the character argument to uppercase using case mapping
1.2068 - * information from the UnicodeData file.
1.2069 - * <p>
1.2070 - * Note that
1.2071 - * {@code Character.isUpperCase(Character.toUpperCase(ch))}
1.2072 - * does not always return {@code true} for some ranges of
1.2073 - * characters, particularly those that are symbols or ideographs.
1.2074 - *
1.2075 - * <p>In general, {@link String#toUpperCase()} should be used to map
1.2076 - * characters to uppercase. {@code String} case mapping methods
1.2077 - * have several benefits over {@code Character} case mapping methods.
1.2078 - * {@code String} case mapping methods can perform locale-sensitive
1.2079 - * mappings, context-sensitive mappings, and 1:M character mappings, whereas
1.2080 - * the {@code Character} case mapping methods cannot.
1.2081 - *
1.2082 - * <p><b>Note:</b> This method cannot handle <a
1.2083 - * href="#supplementary"> supplementary characters</a>. To support
1.2084 - * all Unicode characters, including supplementary characters, use
1.2085 - * the {@link #toUpperCase(int)} method.
1.2086 - *
1.2087 - * @param ch the character to be converted.
1.2088 - * @return the uppercase equivalent of the character, if any;
1.2089 - * otherwise, the character itself.
1.2090 - * @see Character#isUpperCase(char)
1.2091 - * @see String#toUpperCase()
1.2092 - */
1.2093 - public static char toUpperCase(char ch) {
1.2094 - return String.valueOf(ch).toUpperCase().charAt(0);
1.2095 - }
1.2096 -
1.2097 - /**
1.2098 - * Returns the numeric value of the character {@code ch} in the
1.2099 - * specified radix.
1.2100 - * <p>
1.2101 - * If the radix is not in the range {@code MIN_RADIX} ≤
1.2102 - * {@code radix} ≤ {@code MAX_RADIX} or if the
1.2103 - * value of {@code ch} is not a valid digit in the specified
1.2104 - * radix, {@code -1} is returned. A character is a valid digit
1.2105 - * if at least one of the following is true:
1.2106 - * <ul>
1.2107 - * <li>The method {@code isDigit} is {@code true} of the character
1.2108 - * and the Unicode decimal digit value of the character (or its
1.2109 - * single-character decomposition) is less than the specified radix.
1.2110 - * In this case the decimal digit value is returned.
1.2111 - * <li>The character is one of the uppercase Latin letters
1.2112 - * {@code 'A'} through {@code 'Z'} and its code is less than
1.2113 - * {@code radix + 'A' - 10}.
1.2114 - * In this case, {@code ch - 'A' + 10}
1.2115 - * is returned.
1.2116 - * <li>The character is one of the lowercase Latin letters
1.2117 - * {@code 'a'} through {@code 'z'} and its code is less than
1.2118 - * {@code radix + 'a' - 10}.
1.2119 - * In this case, {@code ch - 'a' + 10}
1.2120 - * is returned.
1.2121 - * <li>The character is one of the fullwidth uppercase Latin letters A
1.2122 - * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
1.2123 - * and its code is less than
1.2124 - * {@code radix + '\u005CuFF21' - 10}.
1.2125 - * In this case, {@code ch - '\u005CuFF21' + 10}
1.2126 - * is returned.
1.2127 - * <li>The character is one of the fullwidth lowercase Latin letters a
1.2128 - * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
1.2129 - * and its code is less than
1.2130 - * {@code radix + '\u005CuFF41' - 10}.
1.2131 - * In this case, {@code ch - '\u005CuFF41' + 10}
1.2132 - * is returned.
1.2133 - * </ul>
1.2134 - *
1.2135 - * <p><b>Note:</b> This method cannot handle <a
1.2136 - * href="#supplementary"> supplementary characters</a>. To support
1.2137 - * all Unicode characters, including supplementary characters, use
1.2138 - * the {@link #digit(int, int)} method.
1.2139 - *
1.2140 - * @param ch the character to be converted.
1.2141 - * @param radix the radix.
1.2142 - * @return the numeric value represented by the character in the
1.2143 - * specified radix.
1.2144 - * @see Character#forDigit(int, int)
1.2145 - * @see Character#isDigit(char)
1.2146 - */
1.2147 - public static int digit(char ch, int radix) {
1.2148 - return digit((int)ch, radix);
1.2149 - }
1.2150 -
1.2151 - /**
1.2152 - * Returns the numeric value of the specified character (Unicode
1.2153 - * code point) in the specified radix.
1.2154 - *
1.2155 - * <p>If the radix is not in the range {@code MIN_RADIX} ≤
1.2156 - * {@code radix} ≤ {@code MAX_RADIX} or if the
1.2157 - * character is not a valid digit in the specified
1.2158 - * radix, {@code -1} is returned. A character is a valid digit
1.2159 - * if at least one of the following is true:
1.2160 - * <ul>
1.2161 - * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
1.2162 - * and the Unicode decimal digit value of the character (or its
1.2163 - * single-character decomposition) is less than the specified radix.
1.2164 - * In this case the decimal digit value is returned.
1.2165 - * <li>The character is one of the uppercase Latin letters
1.2166 - * {@code 'A'} through {@code 'Z'} and its code is less than
1.2167 - * {@code radix + 'A' - 10}.
1.2168 - * In this case, {@code codePoint - 'A' + 10}
1.2169 - * is returned.
1.2170 - * <li>The character is one of the lowercase Latin letters
1.2171 - * {@code 'a'} through {@code 'z'} and its code is less than
1.2172 - * {@code radix + 'a' - 10}.
1.2173 - * In this case, {@code codePoint - 'a' + 10}
1.2174 - * is returned.
1.2175 - * <li>The character is one of the fullwidth uppercase Latin letters A
1.2176 - * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
1.2177 - * and its code is less than
1.2178 - * {@code radix + '\u005CuFF21' - 10}.
1.2179 - * In this case,
1.2180 - * {@code codePoint - '\u005CuFF21' + 10}
1.2181 - * is returned.
1.2182 - * <li>The character is one of the fullwidth lowercase Latin letters a
1.2183 - * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
1.2184 - * and its code is less than
1.2185 - * {@code radix + '\u005CuFF41'- 10}.
1.2186 - * In this case,
1.2187 - * {@code codePoint - '\u005CuFF41' + 10}
1.2188 - * is returned.
1.2189 - * </ul>
1.2190 - *
1.2191 - * @param codePoint the character (Unicode code point) to be converted.
1.2192 - * @param radix the radix.
1.2193 - * @return the numeric value represented by the character in the
1.2194 - * specified radix.
1.2195 - * @see Character#forDigit(int, int)
1.2196 - * @see Character#isDigit(int)
1.2197 - * @since 1.5
1.2198 - */
1.2199 - @JavaScriptBody(args = { "codePoint", "radix" }, body=
1.2200 - "var x = parseInt(String.fromCharCode(codePoint), radix);\n"
1.2201 - + "return isNaN(x) ? -1 : x;"
1.2202 - )
1.2203 - public static int digit(int codePoint, int radix) {
1.2204 - throw new UnsupportedOperationException();
1.2205 - }
1.2206 -
1.2207 - /**
1.2208 - * Returns the {@code int} value that the specified Unicode
1.2209 - * character represents. For example, the character
1.2210 - * {@code '\u005Cu216C'} (the roman numeral fifty) will return
1.2211 - * an int with a value of 50.
1.2212 - * <p>
1.2213 - * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
1.2214 - * {@code '\u005Cu005A'}), lowercase
1.2215 - * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
1.2216 - * full width variant ({@code '\u005CuFF21'} through
1.2217 - * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
1.2218 - * {@code '\u005CuFF5A'}) forms have numeric values from 10
1.2219 - * through 35. This is independent of the Unicode specification,
1.2220 - * which does not assign numeric values to these {@code char}
1.2221 - * values.
1.2222 - * <p>
1.2223 - * If the character does not have a numeric value, then -1 is returned.
1.2224 - * If the character has a numeric value that cannot be represented as a
1.2225 - * nonnegative integer (for example, a fractional value), then -2
1.2226 - * is returned.
1.2227 - *
1.2228 - * <p><b>Note:</b> This method cannot handle <a
1.2229 - * href="#supplementary"> supplementary characters</a>. To support
1.2230 - * all Unicode characters, including supplementary characters, use
1.2231 - * the {@link #getNumericValue(int)} method.
1.2232 - *
1.2233 - * @param ch the character to be converted.
1.2234 - * @return the numeric value of the character, as a nonnegative {@code int}
1.2235 - * value; -2 if the character has a numeric value that is not a
1.2236 - * nonnegative integer; -1 if the character has no numeric value.
1.2237 - * @see Character#forDigit(int, int)
1.2238 - * @see Character#isDigit(char)
1.2239 - * @since 1.1
1.2240 - */
1.2241 - public static int getNumericValue(char ch) {
1.2242 - return getNumericValue((int)ch);
1.2243 - }
1.2244 -
1.2245 - /**
1.2246 - * Returns the {@code int} value that the specified
1.2247 - * character (Unicode code point) represents. For example, the character
1.2248 - * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
1.2249 - * an {@code int} with a value of 50.
1.2250 - * <p>
1.2251 - * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
1.2252 - * {@code '\u005Cu005A'}), lowercase
1.2253 - * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
1.2254 - * full width variant ({@code '\u005CuFF21'} through
1.2255 - * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
1.2256 - * {@code '\u005CuFF5A'}) forms have numeric values from 10
1.2257 - * through 35. This is independent of the Unicode specification,
1.2258 - * which does not assign numeric values to these {@code char}
1.2259 - * values.
1.2260 - * <p>
1.2261 - * If the character does not have a numeric value, then -1 is returned.
1.2262 - * If the character has a numeric value that cannot be represented as a
1.2263 - * nonnegative integer (for example, a fractional value), then -2
1.2264 - * is returned.
1.2265 - *
1.2266 - * @param codePoint the character (Unicode code point) to be converted.
1.2267 - * @return the numeric value of the character, as a nonnegative {@code int}
1.2268 - * value; -2 if the character has a numeric value that is not a
1.2269 - * nonnegative integer; -1 if the character has no numeric value.
1.2270 - * @see Character#forDigit(int, int)
1.2271 - * @see Character#isDigit(int)
1.2272 - * @since 1.5
1.2273 - */
1.2274 - public static int getNumericValue(int codePoint) {
1.2275 - throw new UnsupportedOperationException();
1.2276 - }
1.2277 -
1.2278 - /**
1.2279 - * Determines if the specified character is ISO-LATIN-1 white space.
1.2280 - * This method returns {@code true} for the following five
1.2281 - * characters only:
1.2282 - * <table>
1.2283 - * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
1.2284 - * <td>{@code HORIZONTAL TABULATION}</td></tr>
1.2285 - * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
1.2286 - * <td>{@code NEW LINE}</td></tr>
1.2287 - * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
1.2288 - * <td>{@code FORM FEED}</td></tr>
1.2289 - * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
1.2290 - * <td>{@code CARRIAGE RETURN}</td></tr>
1.2291 - * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
1.2292 - * <td>{@code SPACE}</td></tr>
1.2293 - * </table>
1.2294 - *
1.2295 - * @param ch the character to be tested.
1.2296 - * @return {@code true} if the character is ISO-LATIN-1 white
1.2297 - * space; {@code false} otherwise.
1.2298 - * @see Character#isSpaceChar(char)
1.2299 - * @see Character#isWhitespace(char)
1.2300 - * @deprecated Replaced by isWhitespace(char).
1.2301 - */
1.2302 - @Deprecated
1.2303 - public static boolean isSpace(char ch) {
1.2304 - return (ch <= 0x0020) &&
1.2305 - (((((1L << 0x0009) |
1.2306 - (1L << 0x000A) |
1.2307 - (1L << 0x000C) |
1.2308 - (1L << 0x000D) |
1.2309 - (1L << 0x0020)) >> ch) & 1L) != 0);
1.2310 - }
1.2311 -
1.2312 -
1.2313 -
1.2314 - /**
1.2315 - * Determines if the specified character is white space according to Java.
1.2316 - * A character is a Java whitespace character if and only if it satisfies
1.2317 - * one of the following criteria:
1.2318 - * <ul>
1.2319 - * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
1.2320 - * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
1.2321 - * but is not also a non-breaking space ({@code '\u005Cu00A0'},
1.2322 - * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
1.2323 - * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
1.2324 - * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
1.2325 - * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
1.2326 - * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
1.2327 - * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
1.2328 - * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
1.2329 - * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
1.2330 - * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
1.2331 - * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
1.2332 - * </ul>
1.2333 - *
1.2334 - * <p><b>Note:</b> This method cannot handle <a
1.2335 - * href="#supplementary"> supplementary characters</a>. To support
1.2336 - * all Unicode characters, including supplementary characters, use
1.2337 - * the {@link #isWhitespace(int)} method.
1.2338 - *
1.2339 - * @param ch the character to be tested.
1.2340 - * @return {@code true} if the character is a Java whitespace
1.2341 - * character; {@code false} otherwise.
1.2342 - * @see Character#isSpaceChar(char)
1.2343 - * @since 1.1
1.2344 - */
1.2345 - public static boolean isWhitespace(char ch) {
1.2346 - return isWhitespace((int)ch);
1.2347 - }
1.2348 -
1.2349 - /**
1.2350 - * Determines if the specified character (Unicode code point) is
1.2351 - * white space according to Java. A character is a Java
1.2352 - * whitespace character if and only if it satisfies one of the
1.2353 - * following criteria:
1.2354 - * <ul>
1.2355 - * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
1.2356 - * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
1.2357 - * but is not also a non-breaking space ({@code '\u005Cu00A0'},
1.2358 - * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
1.2359 - * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
1.2360 - * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
1.2361 - * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
1.2362 - * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
1.2363 - * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
1.2364 - * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
1.2365 - * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
1.2366 - * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
1.2367 - * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
1.2368 - * </ul>
1.2369 - * <p>
1.2370 - *
1.2371 - * @param codePoint the character (Unicode code point) to be tested.
1.2372 - * @return {@code true} if the character is a Java whitespace
1.2373 - * character; {@code false} otherwise.
1.2374 - * @see Character#isSpaceChar(int)
1.2375 - * @since 1.5
1.2376 - */
1.2377 - public static boolean isWhitespace(int codePoint) {
1.2378 - throw new UnsupportedOperationException();
1.2379 - }
1.2380 -
1.2381 - /**
1.2382 - * Determines if the specified character is an ISO control
1.2383 - * character. A character is considered to be an ISO control
1.2384 - * character if its code is in the range {@code '\u005Cu0000'}
1.2385 - * through {@code '\u005Cu001F'} or in the range
1.2386 - * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
1.2387 - *
1.2388 - * <p><b>Note:</b> This method cannot handle <a
1.2389 - * href="#supplementary"> supplementary characters</a>. To support
1.2390 - * all Unicode characters, including supplementary characters, use
1.2391 - * the {@link #isISOControl(int)} method.
1.2392 - *
1.2393 - * @param ch the character to be tested.
1.2394 - * @return {@code true} if the character is an ISO control character;
1.2395 - * {@code false} otherwise.
1.2396 - *
1.2397 - * @see Character#isSpaceChar(char)
1.2398 - * @see Character#isWhitespace(char)
1.2399 - * @since 1.1
1.2400 - */
1.2401 - public static boolean isISOControl(char ch) {
1.2402 - return isISOControl((int)ch);
1.2403 - }
1.2404 -
1.2405 - /**
1.2406 - * Determines if the referenced character (Unicode code point) is an ISO control
1.2407 - * character. A character is considered to be an ISO control
1.2408 - * character if its code is in the range {@code '\u005Cu0000'}
1.2409 - * through {@code '\u005Cu001F'} or in the range
1.2410 - * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
1.2411 - *
1.2412 - * @param codePoint the character (Unicode code point) to be tested.
1.2413 - * @return {@code true} if the character is an ISO control character;
1.2414 - * {@code false} otherwise.
1.2415 - * @see Character#isSpaceChar(int)
1.2416 - * @see Character#isWhitespace(int)
1.2417 - * @since 1.5
1.2418 - */
1.2419 - public static boolean isISOControl(int codePoint) {
1.2420 - // Optimized form of:
1.2421 - // (codePoint >= 0x00 && codePoint <= 0x1F) ||
1.2422 - // (codePoint >= 0x7F && codePoint <= 0x9F);
1.2423 - return codePoint <= 0x9F &&
1.2424 - (codePoint >= 0x7F || (codePoint >>> 5 == 0));
1.2425 - }
1.2426 -
1.2427 - /**
1.2428 - * Determines the character representation for a specific digit in
1.2429 - * the specified radix. If the value of {@code radix} is not a
1.2430 - * valid radix, or the value of {@code digit} is not a valid
1.2431 - * digit in the specified radix, the null character
1.2432 - * ({@code '\u005Cu0000'}) is returned.
1.2433 - * <p>
1.2434 - * The {@code radix} argument is valid if it is greater than or
1.2435 - * equal to {@code MIN_RADIX} and less than or equal to
1.2436 - * {@code MAX_RADIX}. The {@code digit} argument is valid if
1.2437 - * {@code 0 <= digit < radix}.
1.2438 - * <p>
1.2439 - * If the digit is less than 10, then
1.2440 - * {@code '0' + digit} is returned. Otherwise, the value
1.2441 - * {@code 'a' + digit - 10} is returned.
1.2442 - *
1.2443 - * @param digit the number to convert to a character.
1.2444 - * @param radix the radix.
1.2445 - * @return the {@code char} representation of the specified digit
1.2446 - * in the specified radix.
1.2447 - * @see Character#MIN_RADIX
1.2448 - * @see Character#MAX_RADIX
1.2449 - * @see Character#digit(char, int)
1.2450 - */
1.2451 - public static char forDigit(int digit, int radix) {
1.2452 - if ((digit >= radix) || (digit < 0)) {
1.2453 - return '\0';
1.2454 - }
1.2455 - if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
1.2456 - return '\0';
1.2457 - }
1.2458 - if (digit < 10) {
1.2459 - return (char)('0' + digit);
1.2460 - }
1.2461 - return (char)('a' - 10 + digit);
1.2462 - }
1.2463 -
1.2464 - /**
1.2465 - * Compares two {@code Character} objects numerically.
1.2466 - *
1.2467 - * @param anotherCharacter the {@code Character} to be compared.
1.2468 -
1.2469 - * @return the value {@code 0} if the argument {@code Character}
1.2470 - * is equal to this {@code Character}; a value less than
1.2471 - * {@code 0} if this {@code Character} is numerically less
1.2472 - * than the {@code Character} argument; and a value greater than
1.2473 - * {@code 0} if this {@code Character} is numerically greater
1.2474 - * than the {@code Character} argument (unsigned comparison).
1.2475 - * Note that this is strictly a numerical comparison; it is not
1.2476 - * locale-dependent.
1.2477 - * @since 1.2
1.2478 - */
1.2479 - public int compareTo(Character anotherCharacter) {
1.2480 - return compare(this.value, anotherCharacter.value);
1.2481 - }
1.2482 -
1.2483 - /**
1.2484 - * Compares two {@code char} values numerically.
1.2485 - * The value returned is identical to what would be returned by:
1.2486 - * <pre>
1.2487 - * Character.valueOf(x).compareTo(Character.valueOf(y))
1.2488 - * </pre>
1.2489 - *
1.2490 - * @param x the first {@code char} to compare
1.2491 - * @param y the second {@code char} to compare
1.2492 - * @return the value {@code 0} if {@code x == y};
1.2493 - * a value less than {@code 0} if {@code x < y}; and
1.2494 - * a value greater than {@code 0} if {@code x > y}
1.2495 - * @since 1.7
1.2496 - */
1.2497 - public static int compare(char x, char y) {
1.2498 - return x - y;
1.2499 - }
1.2500 -
1.2501 -
1.2502 - /**
1.2503 - * The number of bits used to represent a <tt>char</tt> value in unsigned
1.2504 - * binary form, constant {@code 16}.
1.2505 - *
1.2506 - * @since 1.5
1.2507 - */
1.2508 - public static final int SIZE = 16;
1.2509 -
1.2510 - /**
1.2511 - * Returns the value obtained by reversing the order of the bytes in the
1.2512 - * specified <tt>char</tt> value.
1.2513 - *
1.2514 - * @return the value obtained by reversing (or, equivalently, swapping)
1.2515 - * the bytes in the specified <tt>char</tt> value.
1.2516 - * @since 1.5
1.2517 - */
1.2518 - public static char reverseBytes(char ch) {
1.2519 - return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
1.2520 - }
1.2521 -
1.2522 -}