1.1 --- a/emul/src/main/java/java/lang/Character.java Wed Dec 05 10:03:58 2012 +0100
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,2388 +0,0 @@
1.4 -/*
1.5 - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
1.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1.7 - *
1.8 - * This code is free software; you can redistribute it and/or modify it
1.9 - * under the terms of the GNU General Public License version 2 only, as
1.10 - * published by the Free Software Foundation. Oracle designates this
1.11 - * particular file as subject to the "Classpath" exception as provided
1.12 - * by Oracle in the LICENSE file that accompanied this code.
1.13 - *
1.14 - * This code is distributed in the hope that it will be useful, but WITHOUT
1.15 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.16 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1.17 - * version 2 for more details (a copy is included in the LICENSE file that
1.18 - * accompanied this code).
1.19 - *
1.20 - * You should have received a copy of the GNU General Public License version
1.21 - * 2 along with this work; if not, write to the Free Software Foundation,
1.22 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1.23 - *
1.24 - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1.25 - * or visit www.oracle.com if you need additional information or have any
1.26 - * questions.
1.27 - */
1.28 -
1.29 -package java.lang;
1.30 -
1.31 -/**
1.32 - * The {@code Character} class wraps a value of the primitive
1.33 - * type {@code char} in an object. An object of type
1.34 - * {@code Character} contains a single field whose type is
1.35 - * {@code char}.
1.36 - * <p>
1.37 - * In addition, this class provides several methods for determining
1.38 - * a character's category (lowercase letter, digit, etc.) and for converting
1.39 - * characters from uppercase to lowercase and vice versa.
1.40 - * <p>
1.41 - * Character information is based on the Unicode Standard, version 6.0.0.
1.42 - * <p>
1.43 - * The methods and data of class {@code Character} are defined by
1.44 - * the information in the <i>UnicodeData</i> file that is part of the
1.45 - * Unicode Character Database maintained by the Unicode
1.46 - * Consortium. This file specifies various properties including name
1.47 - * and general category for every defined Unicode code point or
1.48 - * character range.
1.49 - * <p>
1.50 - * The file and its description are available from the Unicode Consortium at:
1.51 - * <ul>
1.52 - * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
1.53 - * </ul>
1.54 - *
1.55 - * <h4><a name="unicode">Unicode Character Representations</a></h4>
1.56 - *
1.57 - * <p>The {@code char} data type (and therefore the value that a
1.58 - * {@code Character} object encapsulates) are based on the
1.59 - * original Unicode specification, which defined characters as
1.60 - * fixed-width 16-bit entities. The Unicode Standard has since been
1.61 - * changed to allow for characters whose representation requires more
1.62 - * than 16 bits. The range of legal <em>code point</em>s is now
1.63 - * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
1.64 - * (Refer to the <a
1.65 - * href="http://www.unicode.org/reports/tr27/#notation"><i>
1.66 - * definition</i></a> of the U+<i>n</i> notation in the Unicode
1.67 - * Standard.)
1.68 - *
1.69 - * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
1.70 - * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
1.71 - * <a name="supplementary">Characters</a> whose code points are greater
1.72 - * than U+FFFF are called <em>supplementary character</em>s. The Java
1.73 - * platform uses the UTF-16 representation in {@code char} arrays and
1.74 - * in the {@code String} and {@code StringBuffer} classes. In
1.75 - * this representation, supplementary characters are represented as a pair
1.76 - * of {@code char} values, the first from the <em>high-surrogates</em>
1.77 - * range, (\uD800-\uDBFF), the second from the
1.78 - * <em>low-surrogates</em> range (\uDC00-\uDFFF).
1.79 - *
1.80 - * <p>A {@code char} value, therefore, represents Basic
1.81 - * Multilingual Plane (BMP) code points, including the surrogate
1.82 - * code points, or code units of the UTF-16 encoding. An
1.83 - * {@code int} value represents all Unicode code points,
1.84 - * including supplementary code points. The lower (least significant)
1.85 - * 21 bits of {@code int} are used to represent Unicode code
1.86 - * points and the upper (most significant) 11 bits must be zero.
1.87 - * Unless otherwise specified, the behavior with respect to
1.88 - * supplementary characters and surrogate {@code char} values is
1.89 - * as follows:
1.90 - *
1.91 - * <ul>
1.92 - * <li>The methods that only accept a {@code char} value cannot support
1.93 - * supplementary characters. They treat {@code char} values from the
1.94 - * surrogate ranges as undefined characters. For example,
1.95 - * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
1.96 - * this specific value if followed by any low-surrogate value in a string
1.97 - * would represent a letter.
1.98 - *
1.99 - * <li>The methods that accept an {@code int} value support all
1.100 - * Unicode characters, including supplementary characters. For
1.101 - * example, {@code Character.isLetter(0x2F81A)} returns
1.102 - * {@code true} because the code point value represents a letter
1.103 - * (a CJK ideograph).
1.104 - * </ul>
1.105 - *
1.106 - * <p>In the Java SE API documentation, <em>Unicode code point</em> is
1.107 - * used for character values in the range between U+0000 and U+10FFFF,
1.108 - * and <em>Unicode code unit</em> is used for 16-bit
1.109 - * {@code char} values that are code units of the <em>UTF-16</em>
1.110 - * encoding. For more information on Unicode terminology, refer to the
1.111 - * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
1.112 - *
1.113 - * @author Lee Boynton
1.114 - * @author Guy Steele
1.115 - * @author Akira Tanaka
1.116 - * @author Martin Buchholz
1.117 - * @author Ulf Zibis
1.118 - * @since 1.0
1.119 - */
1.120 -public final
1.121 -class Character implements java.io.Serializable, Comparable<Character> {
1.122 - /**
1.123 - * The minimum radix available for conversion to and from strings.
1.124 - * The constant value of this field is the smallest value permitted
1.125 - * for the radix argument in radix-conversion methods such as the
1.126 - * {@code digit} method, the {@code forDigit} method, and the
1.127 - * {@code toString} method of class {@code Integer}.
1.128 - *
1.129 - * @see Character#digit(char, int)
1.130 - * @see Character#forDigit(int, int)
1.131 - * @see Integer#toString(int, int)
1.132 - * @see Integer#valueOf(String)
1.133 - */
1.134 - public static final int MIN_RADIX = 2;
1.135 -
1.136 - /**
1.137 - * The maximum radix available for conversion to and from strings.
1.138 - * The constant value of this field is the largest value permitted
1.139 - * for the radix argument in radix-conversion methods such as the
1.140 - * {@code digit} method, the {@code forDigit} method, and the
1.141 - * {@code toString} method of class {@code Integer}.
1.142 - *
1.143 - * @see Character#digit(char, int)
1.144 - * @see Character#forDigit(int, int)
1.145 - * @see Integer#toString(int, int)
1.146 - * @see Integer#valueOf(String)
1.147 - */
1.148 - public static final int MAX_RADIX = 36;
1.149 -
1.150 - /**
1.151 - * The constant value of this field is the smallest value of type
1.152 - * {@code char}, {@code '\u005Cu0000'}.
1.153 - *
1.154 - * @since 1.0.2
1.155 - */
1.156 - public static final char MIN_VALUE = '\u0000';
1.157 -
1.158 - /**
1.159 - * The constant value of this field is the largest value of type
1.160 - * {@code char}, {@code '\u005CuFFFF'}.
1.161 - *
1.162 - * @since 1.0.2
1.163 - */
1.164 - public static final char MAX_VALUE = '\uFFFF';
1.165 -
1.166 - /**
1.167 - * The {@code Class} instance representing the primitive type
1.168 - * {@code char}.
1.169 - *
1.170 - * @since 1.1
1.171 - */
1.172 - public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
1.173 -
1.174 - /*
1.175 - * Normative general types
1.176 - */
1.177 -
1.178 - /*
1.179 - * General character types
1.180 - */
1.181 -
1.182 - /**
1.183 - * General category "Cn" in the Unicode specification.
1.184 - * @since 1.1
1.185 - */
1.186 - public static final byte UNASSIGNED = 0;
1.187 -
1.188 - /**
1.189 - * General category "Lu" in the Unicode specification.
1.190 - * @since 1.1
1.191 - */
1.192 - public static final byte UPPERCASE_LETTER = 1;
1.193 -
1.194 - /**
1.195 - * General category "Ll" in the Unicode specification.
1.196 - * @since 1.1
1.197 - */
1.198 - public static final byte LOWERCASE_LETTER = 2;
1.199 -
1.200 - /**
1.201 - * General category "Lt" in the Unicode specification.
1.202 - * @since 1.1
1.203 - */
1.204 - public static final byte TITLECASE_LETTER = 3;
1.205 -
1.206 - /**
1.207 - * General category "Lm" in the Unicode specification.
1.208 - * @since 1.1
1.209 - */
1.210 - public static final byte MODIFIER_LETTER = 4;
1.211 -
1.212 - /**
1.213 - * General category "Lo" in the Unicode specification.
1.214 - * @since 1.1
1.215 - */
1.216 - public static final byte OTHER_LETTER = 5;
1.217 -
1.218 - /**
1.219 - * General category "Mn" in the Unicode specification.
1.220 - * @since 1.1
1.221 - */
1.222 - public static final byte NON_SPACING_MARK = 6;
1.223 -
1.224 - /**
1.225 - * General category "Me" in the Unicode specification.
1.226 - * @since 1.1
1.227 - */
1.228 - public static final byte ENCLOSING_MARK = 7;
1.229 -
1.230 - /**
1.231 - * General category "Mc" in the Unicode specification.
1.232 - * @since 1.1
1.233 - */
1.234 - public static final byte COMBINING_SPACING_MARK = 8;
1.235 -
1.236 - /**
1.237 - * General category "Nd" in the Unicode specification.
1.238 - * @since 1.1
1.239 - */
1.240 - public static final byte DECIMAL_DIGIT_NUMBER = 9;
1.241 -
1.242 - /**
1.243 - * General category "Nl" in the Unicode specification.
1.244 - * @since 1.1
1.245 - */
1.246 - public static final byte LETTER_NUMBER = 10;
1.247 -
1.248 - /**
1.249 - * General category "No" in the Unicode specification.
1.250 - * @since 1.1
1.251 - */
1.252 - public static final byte OTHER_NUMBER = 11;
1.253 -
1.254 - /**
1.255 - * General category "Zs" in the Unicode specification.
1.256 - * @since 1.1
1.257 - */
1.258 - public static final byte SPACE_SEPARATOR = 12;
1.259 -
1.260 - /**
1.261 - * General category "Zl" in the Unicode specification.
1.262 - * @since 1.1
1.263 - */
1.264 - public static final byte LINE_SEPARATOR = 13;
1.265 -
1.266 - /**
1.267 - * General category "Zp" in the Unicode specification.
1.268 - * @since 1.1
1.269 - */
1.270 - public static final byte PARAGRAPH_SEPARATOR = 14;
1.271 -
1.272 - /**
1.273 - * General category "Cc" in the Unicode specification.
1.274 - * @since 1.1
1.275 - */
1.276 - public static final byte CONTROL = 15;
1.277 -
1.278 - /**
1.279 - * General category "Cf" in the Unicode specification.
1.280 - * @since 1.1
1.281 - */
1.282 - public static final byte FORMAT = 16;
1.283 -
1.284 - /**
1.285 - * General category "Co" in the Unicode specification.
1.286 - * @since 1.1
1.287 - */
1.288 - public static final byte PRIVATE_USE = 18;
1.289 -
1.290 - /**
1.291 - * General category "Cs" in the Unicode specification.
1.292 - * @since 1.1
1.293 - */
1.294 - public static final byte SURROGATE = 19;
1.295 -
1.296 - /**
1.297 - * General category "Pd" in the Unicode specification.
1.298 - * @since 1.1
1.299 - */
1.300 - public static final byte DASH_PUNCTUATION = 20;
1.301 -
1.302 - /**
1.303 - * General category "Ps" in the Unicode specification.
1.304 - * @since 1.1
1.305 - */
1.306 - public static final byte START_PUNCTUATION = 21;
1.307 -
1.308 - /**
1.309 - * General category "Pe" in the Unicode specification.
1.310 - * @since 1.1
1.311 - */
1.312 - public static final byte END_PUNCTUATION = 22;
1.313 -
1.314 - /**
1.315 - * General category "Pc" in the Unicode specification.
1.316 - * @since 1.1
1.317 - */
1.318 - public static final byte CONNECTOR_PUNCTUATION = 23;
1.319 -
1.320 - /**
1.321 - * General category "Po" in the Unicode specification.
1.322 - * @since 1.1
1.323 - */
1.324 - public static final byte OTHER_PUNCTUATION = 24;
1.325 -
1.326 - /**
1.327 - * General category "Sm" in the Unicode specification.
1.328 - * @since 1.1
1.329 - */
1.330 - public static final byte MATH_SYMBOL = 25;
1.331 -
1.332 - /**
1.333 - * General category "Sc" in the Unicode specification.
1.334 - * @since 1.1
1.335 - */
1.336 - public static final byte CURRENCY_SYMBOL = 26;
1.337 -
1.338 - /**
1.339 - * General category "Sk" in the Unicode specification.
1.340 - * @since 1.1
1.341 - */
1.342 - public static final byte MODIFIER_SYMBOL = 27;
1.343 -
1.344 - /**
1.345 - * General category "So" in the Unicode specification.
1.346 - * @since 1.1
1.347 - */
1.348 - public static final byte OTHER_SYMBOL = 28;
1.349 -
1.350 - /**
1.351 - * General category "Pi" in the Unicode specification.
1.352 - * @since 1.4
1.353 - */
1.354 - public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1.355 -
1.356 - /**
1.357 - * General category "Pf" in the Unicode specification.
1.358 - * @since 1.4
1.359 - */
1.360 - public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1.361 -
1.362 - /**
1.363 - * Error flag. Use int (code point) to avoid confusion with U+FFFF.
1.364 - */
1.365 - static final int ERROR = 0xFFFFFFFF;
1.366 -
1.367 -
1.368 - /**
1.369 - * Undefined bidirectional character type. Undefined {@code char}
1.370 - * values have undefined directionality in the Unicode specification.
1.371 - * @since 1.4
1.372 - */
1.373 - public static final byte DIRECTIONALITY_UNDEFINED = -1;
1.374 -
1.375 - /**
1.376 - * Strong bidirectional character type "L" in the Unicode specification.
1.377 - * @since 1.4
1.378 - */
1.379 - public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1.380 -
1.381 - /**
1.382 - * Strong bidirectional character type "R" in the Unicode specification.
1.383 - * @since 1.4
1.384 - */
1.385 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1.386 -
1.387 - /**
1.388 - * Strong bidirectional character type "AL" in the Unicode specification.
1.389 - * @since 1.4
1.390 - */
1.391 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1.392 -
1.393 - /**
1.394 - * Weak bidirectional character type "EN" in the Unicode specification.
1.395 - * @since 1.4
1.396 - */
1.397 - public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1.398 -
1.399 - /**
1.400 - * Weak bidirectional character type "ES" in the Unicode specification.
1.401 - * @since 1.4
1.402 - */
1.403 - public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1.404 -
1.405 - /**
1.406 - * Weak bidirectional character type "ET" in the Unicode specification.
1.407 - * @since 1.4
1.408 - */
1.409 - public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1.410 -
1.411 - /**
1.412 - * Weak bidirectional character type "AN" in the Unicode specification.
1.413 - * @since 1.4
1.414 - */
1.415 - public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1.416 -
1.417 - /**
1.418 - * Weak bidirectional character type "CS" in the Unicode specification.
1.419 - * @since 1.4
1.420 - */
1.421 - public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1.422 -
1.423 - /**
1.424 - * Weak bidirectional character type "NSM" in the Unicode specification.
1.425 - * @since 1.4
1.426 - */
1.427 - public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1.428 -
1.429 - /**
1.430 - * Weak bidirectional character type "BN" in the Unicode specification.
1.431 - * @since 1.4
1.432 - */
1.433 - public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1.434 -
1.435 - /**
1.436 - * Neutral bidirectional character type "B" in the Unicode specification.
1.437 - * @since 1.4
1.438 - */
1.439 - public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1.440 -
1.441 - /**
1.442 - * Neutral bidirectional character type "S" in the Unicode specification.
1.443 - * @since 1.4
1.444 - */
1.445 - public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1.446 -
1.447 - /**
1.448 - * Neutral bidirectional character type "WS" in the Unicode specification.
1.449 - * @since 1.4
1.450 - */
1.451 - public static final byte DIRECTIONALITY_WHITESPACE = 12;
1.452 -
1.453 - /**
1.454 - * Neutral bidirectional character type "ON" in the Unicode specification.
1.455 - * @since 1.4
1.456 - */
1.457 - public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1.458 -
1.459 - /**
1.460 - * Strong bidirectional character type "LRE" in the Unicode specification.
1.461 - * @since 1.4
1.462 - */
1.463 - public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1.464 -
1.465 - /**
1.466 - * Strong bidirectional character type "LRO" in the Unicode specification.
1.467 - * @since 1.4
1.468 - */
1.469 - public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1.470 -
1.471 - /**
1.472 - * Strong bidirectional character type "RLE" in the Unicode specification.
1.473 - * @since 1.4
1.474 - */
1.475 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1.476 -
1.477 - /**
1.478 - * Strong bidirectional character type "RLO" in the Unicode specification.
1.479 - * @since 1.4
1.480 - */
1.481 - public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1.482 -
1.483 - /**
1.484 - * Weak bidirectional character type "PDF" in the Unicode specification.
1.485 - * @since 1.4
1.486 - */
1.487 - public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1.488 -
1.489 - /**
1.490 - * The minimum value of a
1.491 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.492 - * Unicode high-surrogate code unit</a>
1.493 - * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
1.494 - * A high-surrogate is also known as a <i>leading-surrogate</i>.
1.495 - *
1.496 - * @since 1.5
1.497 - */
1.498 - public static final char MIN_HIGH_SURROGATE = '\uD800';
1.499 -
1.500 - /**
1.501 - * The maximum value of a
1.502 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.503 - * Unicode high-surrogate code unit</a>
1.504 - * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
1.505 - * A high-surrogate is also known as a <i>leading-surrogate</i>.
1.506 - *
1.507 - * @since 1.5
1.508 - */
1.509 - public static final char MAX_HIGH_SURROGATE = '\uDBFF';
1.510 -
1.511 - /**
1.512 - * The minimum value of a
1.513 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.514 - * Unicode low-surrogate code unit</a>
1.515 - * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
1.516 - * A low-surrogate is also known as a <i>trailing-surrogate</i>.
1.517 - *
1.518 - * @since 1.5
1.519 - */
1.520 - public static final char MIN_LOW_SURROGATE = '\uDC00';
1.521 -
1.522 - /**
1.523 - * The maximum value of a
1.524 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.525 - * Unicode low-surrogate code unit</a>
1.526 - * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
1.527 - * A low-surrogate is also known as a <i>trailing-surrogate</i>.
1.528 - *
1.529 - * @since 1.5
1.530 - */
1.531 - public static final char MAX_LOW_SURROGATE = '\uDFFF';
1.532 -
1.533 - /**
1.534 - * The minimum value of a Unicode surrogate code unit in the
1.535 - * UTF-16 encoding, constant {@code '\u005CuD800'}.
1.536 - *
1.537 - * @since 1.5
1.538 - */
1.539 - public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
1.540 -
1.541 - /**
1.542 - * The maximum value of a Unicode surrogate code unit in the
1.543 - * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
1.544 - *
1.545 - * @since 1.5
1.546 - */
1.547 - public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
1.548 -
1.549 - /**
1.550 - * The minimum value of a
1.551 - * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
1.552 - * Unicode supplementary code point</a>, constant {@code U+10000}.
1.553 - *
1.554 - * @since 1.5
1.555 - */
1.556 - public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
1.557 -
1.558 - /**
1.559 - * The minimum value of a
1.560 - * <a href="http://www.unicode.org/glossary/#code_point">
1.561 - * Unicode code point</a>, constant {@code U+0000}.
1.562 - *
1.563 - * @since 1.5
1.564 - */
1.565 - public static final int MIN_CODE_POINT = 0x000000;
1.566 -
1.567 - /**
1.568 - * The maximum value of a
1.569 - * <a href="http://www.unicode.org/glossary/#code_point">
1.570 - * Unicode code point</a>, constant {@code U+10FFFF}.
1.571 - *
1.572 - * @since 1.5
1.573 - */
1.574 - public static final int MAX_CODE_POINT = 0X10FFFF;
1.575 -
1.576 -
1.577 - /**
1.578 - * Instances of this class represent particular subsets of the Unicode
1.579 - * character set. The only family of subsets defined in the
1.580 - * {@code Character} class is {@link Character.UnicodeBlock}.
1.581 - * Other portions of the Java API may define other subsets for their
1.582 - * own purposes.
1.583 - *
1.584 - * @since 1.2
1.585 - */
1.586 - public static class Subset {
1.587 -
1.588 - private String name;
1.589 -
1.590 - /**
1.591 - * Constructs a new {@code Subset} instance.
1.592 - *
1.593 - * @param name The name of this subset
1.594 - * @exception NullPointerException if name is {@code null}
1.595 - */
1.596 - protected Subset(String name) {
1.597 - if (name == null) {
1.598 - throw new NullPointerException("name");
1.599 - }
1.600 - this.name = name;
1.601 - }
1.602 -
1.603 - /**
1.604 - * Compares two {@code Subset} objects for equality.
1.605 - * This method returns {@code true} if and only if
1.606 - * {@code this} and the argument refer to the same
1.607 - * object; since this method is {@code final}, this
1.608 - * guarantee holds for all subclasses.
1.609 - */
1.610 - public final boolean equals(Object obj) {
1.611 - return (this == obj);
1.612 - }
1.613 -
1.614 - /**
1.615 - * Returns the standard hash code as defined by the
1.616 - * {@link Object#hashCode} method. This method
1.617 - * is {@code final} in order to ensure that the
1.618 - * {@code equals} and {@code hashCode} methods will
1.619 - * be consistent in all subclasses.
1.620 - */
1.621 - public final int hashCode() {
1.622 - return super.hashCode();
1.623 - }
1.624 -
1.625 - /**
1.626 - * Returns the name of this subset.
1.627 - */
1.628 - public final String toString() {
1.629 - return name;
1.630 - }
1.631 - }
1.632 -
1.633 - // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
1.634 - // for the latest specification of Unicode Blocks.
1.635 -
1.636 -
1.637 - /**
1.638 - * The value of the {@code Character}.
1.639 - *
1.640 - * @serial
1.641 - */
1.642 - private final char value;
1.643 -
1.644 - /** use serialVersionUID from JDK 1.0.2 for interoperability */
1.645 - private static final long serialVersionUID = 3786198910865385080L;
1.646 -
1.647 - /**
1.648 - * Constructs a newly allocated {@code Character} object that
1.649 - * represents the specified {@code char} value.
1.650 - *
1.651 - * @param value the value to be represented by the
1.652 - * {@code Character} object.
1.653 - */
1.654 - public Character(char value) {
1.655 - this.value = value;
1.656 - }
1.657 -
1.658 - private static class CharacterCache {
1.659 - private CharacterCache(){}
1.660 -
1.661 - static final Character cache[] = new Character[127 + 1];
1.662 -
1.663 - static {
1.664 - for (int i = 0; i < cache.length; i++)
1.665 - cache[i] = new Character((char)i);
1.666 - }
1.667 - }
1.668 -
1.669 - /**
1.670 - * Returns a <tt>Character</tt> instance representing the specified
1.671 - * <tt>char</tt> value.
1.672 - * If a new <tt>Character</tt> instance is not required, this method
1.673 - * should generally be used in preference to the constructor
1.674 - * {@link #Character(char)}, as this method is likely to yield
1.675 - * significantly better space and time performance by caching
1.676 - * frequently requested values.
1.677 - *
1.678 - * This method will always cache values in the range {@code
1.679 - * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
1.680 - * cache other values outside of this range.
1.681 - *
1.682 - * @param c a char value.
1.683 - * @return a <tt>Character</tt> instance representing <tt>c</tt>.
1.684 - * @since 1.5
1.685 - */
1.686 - public static Character valueOf(char c) {
1.687 - if (c <= 127) { // must cache
1.688 - return CharacterCache.cache[(int)c];
1.689 - }
1.690 - return new Character(c);
1.691 - }
1.692 -
1.693 - /**
1.694 - * Returns the value of this {@code Character} object.
1.695 - * @return the primitive {@code char} value represented by
1.696 - * this object.
1.697 - */
1.698 - public char charValue() {
1.699 - return value;
1.700 - }
1.701 -
1.702 - /**
1.703 - * Returns a hash code for this {@code Character}; equal to the result
1.704 - * of invoking {@code charValue()}.
1.705 - *
1.706 - * @return a hash code value for this {@code Character}
1.707 - */
1.708 - public int hashCode() {
1.709 - return (int)value;
1.710 - }
1.711 -
1.712 - /**
1.713 - * Compares this object against the specified object.
1.714 - * The result is {@code true} if and only if the argument is not
1.715 - * {@code null} and is a {@code Character} object that
1.716 - * represents the same {@code char} value as this object.
1.717 - *
1.718 - * @param obj the object to compare with.
1.719 - * @return {@code true} if the objects are the same;
1.720 - * {@code false} otherwise.
1.721 - */
1.722 - public boolean equals(Object obj) {
1.723 - if (obj instanceof Character) {
1.724 - return value == ((Character)obj).charValue();
1.725 - }
1.726 - return false;
1.727 - }
1.728 -
1.729 - /**
1.730 - * Returns a {@code String} object representing this
1.731 - * {@code Character}'s value. The result is a string of
1.732 - * length 1 whose sole component is the primitive
1.733 - * {@code char} value represented by this
1.734 - * {@code Character} object.
1.735 - *
1.736 - * @return a string representation of this object.
1.737 - */
1.738 - public String toString() {
1.739 - char buf[] = {value};
1.740 - return String.valueOf(buf);
1.741 - }
1.742 -
1.743 - /**
1.744 - * Returns a {@code String} object representing the
1.745 - * specified {@code char}. The result is a string of length
1.746 - * 1 consisting solely of the specified {@code char}.
1.747 - *
1.748 - * @param c the {@code char} to be converted
1.749 - * @return the string representation of the specified {@code char}
1.750 - * @since 1.4
1.751 - */
1.752 - public static String toString(char c) {
1.753 - return String.valueOf(c);
1.754 - }
1.755 -
1.756 - /**
1.757 - * Determines whether the specified code point is a valid
1.758 - * <a href="http://www.unicode.org/glossary/#code_point">
1.759 - * Unicode code point value</a>.
1.760 - *
1.761 - * @param codePoint the Unicode code point to be tested
1.762 - * @return {@code true} if the specified code point value is between
1.763 - * {@link #MIN_CODE_POINT} and
1.764 - * {@link #MAX_CODE_POINT} inclusive;
1.765 - * {@code false} otherwise.
1.766 - * @since 1.5
1.767 - */
1.768 - public static boolean isValidCodePoint(int codePoint) {
1.769 - // Optimized form of:
1.770 - // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
1.771 - int plane = codePoint >>> 16;
1.772 - return plane < ((MAX_CODE_POINT + 1) >>> 16);
1.773 - }
1.774 -
1.775 - /**
1.776 - * Determines whether the specified character (Unicode code point)
1.777 - * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
1.778 - * Such code points can be represented using a single {@code char}.
1.779 - *
1.780 - * @param codePoint the character (Unicode code point) to be tested
1.781 - * @return {@code true} if the specified code point is between
1.782 - * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
1.783 - * {@code false} otherwise.
1.784 - * @since 1.7
1.785 - */
1.786 - public static boolean isBmpCodePoint(int codePoint) {
1.787 - return codePoint >>> 16 == 0;
1.788 - // Optimized form of:
1.789 - // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
1.790 - // We consistently use logical shift (>>>) to facilitate
1.791 - // additional runtime optimizations.
1.792 - }
1.793 -
1.794 - /**
1.795 - * Determines whether the specified character (Unicode code point)
1.796 - * is in the <a href="#supplementary">supplementary character</a> range.
1.797 - *
1.798 - * @param codePoint the character (Unicode code point) to be tested
1.799 - * @return {@code true} if the specified code point is between
1.800 - * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
1.801 - * {@link #MAX_CODE_POINT} inclusive;
1.802 - * {@code false} otherwise.
1.803 - * @since 1.5
1.804 - */
1.805 - public static boolean isSupplementaryCodePoint(int codePoint) {
1.806 - return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
1.807 - && codePoint < MAX_CODE_POINT + 1;
1.808 - }
1.809 -
1.810 - /**
1.811 - * Determines if the given {@code char} value is a
1.812 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.813 - * Unicode high-surrogate code unit</a>
1.814 - * (also known as <i>leading-surrogate code unit</i>).
1.815 - *
1.816 - * <p>Such values do not represent characters by themselves,
1.817 - * but are used in the representation of
1.818 - * <a href="#supplementary">supplementary characters</a>
1.819 - * in the UTF-16 encoding.
1.820 - *
1.821 - * @param ch the {@code char} value to be tested.
1.822 - * @return {@code true} if the {@code char} value is between
1.823 - * {@link #MIN_HIGH_SURROGATE} and
1.824 - * {@link #MAX_HIGH_SURROGATE} inclusive;
1.825 - * {@code false} otherwise.
1.826 - * @see Character#isLowSurrogate(char)
1.827 - * @see Character.UnicodeBlock#of(int)
1.828 - * @since 1.5
1.829 - */
1.830 - public static boolean isHighSurrogate(char ch) {
1.831 - // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
1.832 - return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
1.833 - }
1.834 -
1.835 - /**
1.836 - * Determines if the given {@code char} value is a
1.837 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.838 - * Unicode low-surrogate code unit</a>
1.839 - * (also known as <i>trailing-surrogate code unit</i>).
1.840 - *
1.841 - * <p>Such values do not represent characters by themselves,
1.842 - * but are used in the representation of
1.843 - * <a href="#supplementary">supplementary characters</a>
1.844 - * in the UTF-16 encoding.
1.845 - *
1.846 - * @param ch the {@code char} value to be tested.
1.847 - * @return {@code true} if the {@code char} value is between
1.848 - * {@link #MIN_LOW_SURROGATE} and
1.849 - * {@link #MAX_LOW_SURROGATE} inclusive;
1.850 - * {@code false} otherwise.
1.851 - * @see Character#isHighSurrogate(char)
1.852 - * @since 1.5
1.853 - */
1.854 - public static boolean isLowSurrogate(char ch) {
1.855 - return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
1.856 - }
1.857 -
1.858 - /**
1.859 - * Determines if the given {@code char} value is a Unicode
1.860 - * <i>surrogate code unit</i>.
1.861 - *
1.862 - * <p>Such values do not represent characters by themselves,
1.863 - * but are used in the representation of
1.864 - * <a href="#supplementary">supplementary characters</a>
1.865 - * in the UTF-16 encoding.
1.866 - *
1.867 - * <p>A char value is a surrogate code unit if and only if it is either
1.868 - * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
1.869 - * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
1.870 - *
1.871 - * @param ch the {@code char} value to be tested.
1.872 - * @return {@code true} if the {@code char} value is between
1.873 - * {@link #MIN_SURROGATE} and
1.874 - * {@link #MAX_SURROGATE} inclusive;
1.875 - * {@code false} otherwise.
1.876 - * @since 1.7
1.877 - */
1.878 - public static boolean isSurrogate(char ch) {
1.879 - return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
1.880 - }
1.881 -
1.882 - /**
1.883 - * Determines whether the specified pair of {@code char}
1.884 - * values is a valid
1.885 - * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.886 - * Unicode surrogate pair</a>.
1.887 -
1.888 - * <p>This method is equivalent to the expression:
1.889 - * <blockquote><pre>
1.890 - * isHighSurrogate(high) && isLowSurrogate(low)
1.891 - * </pre></blockquote>
1.892 - *
1.893 - * @param high the high-surrogate code value to be tested
1.894 - * @param low the low-surrogate code value to be tested
1.895 - * @return {@code true} if the specified high and
1.896 - * low-surrogate code values represent a valid surrogate pair;
1.897 - * {@code false} otherwise.
1.898 - * @since 1.5
1.899 - */
1.900 - public static boolean isSurrogatePair(char high, char low) {
1.901 - return isHighSurrogate(high) && isLowSurrogate(low);
1.902 - }
1.903 -
1.904 - /**
1.905 - * Determines the number of {@code char} values needed to
1.906 - * represent the specified character (Unicode code point). If the
1.907 - * specified character is equal to or greater than 0x10000, then
1.908 - * the method returns 2. Otherwise, the method returns 1.
1.909 - *
1.910 - * <p>This method doesn't validate the specified character to be a
1.911 - * valid Unicode code point. The caller must validate the
1.912 - * character value using {@link #isValidCodePoint(int) isValidCodePoint}
1.913 - * if necessary.
1.914 - *
1.915 - * @param codePoint the character (Unicode code point) to be tested.
1.916 - * @return 2 if the character is a valid supplementary character; 1 otherwise.
1.917 - * @see Character#isSupplementaryCodePoint(int)
1.918 - * @since 1.5
1.919 - */
1.920 - public static int charCount(int codePoint) {
1.921 - return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
1.922 - }
1.923 -
1.924 - /**
1.925 - * Converts the specified surrogate pair to its supplementary code
1.926 - * point value. This method does not validate the specified
1.927 - * surrogate pair. The caller must validate it using {@link
1.928 - * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
1.929 - *
1.930 - * @param high the high-surrogate code unit
1.931 - * @param low the low-surrogate code unit
1.932 - * @return the supplementary code point composed from the
1.933 - * specified surrogate pair.
1.934 - * @since 1.5
1.935 - */
1.936 - public static int toCodePoint(char high, char low) {
1.937 - // Optimized form of:
1.938 - // return ((high - MIN_HIGH_SURROGATE) << 10)
1.939 - // + (low - MIN_LOW_SURROGATE)
1.940 - // + MIN_SUPPLEMENTARY_CODE_POINT;
1.941 - return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
1.942 - - (MIN_HIGH_SURROGATE << 10)
1.943 - - MIN_LOW_SURROGATE);
1.944 - }
1.945 -
1.946 - /**
1.947 - * Returns the code point at the given index of the
1.948 - * {@code CharSequence}. If the {@code char} value at
1.949 - * the given index in the {@code CharSequence} is in the
1.950 - * high-surrogate range, the following index is less than the
1.951 - * length of the {@code CharSequence}, and the
1.952 - * {@code char} value at the following index is in the
1.953 - * low-surrogate range, then the supplementary code point
1.954 - * corresponding to this surrogate pair is returned. Otherwise,
1.955 - * the {@code char} value at the given index is returned.
1.956 - *
1.957 - * @param seq a sequence of {@code char} values (Unicode code
1.958 - * units)
1.959 - * @param index the index to the {@code char} values (Unicode
1.960 - * code units) in {@code seq} to be converted
1.961 - * @return the Unicode code point at the given index
1.962 - * @exception NullPointerException if {@code seq} is null.
1.963 - * @exception IndexOutOfBoundsException if the value
1.964 - * {@code index} is negative or not less than
1.965 - * {@link CharSequence#length() seq.length()}.
1.966 - * @since 1.5
1.967 - */
1.968 - public static int codePointAt(CharSequence seq, int index) {
1.969 - char c1 = seq.charAt(index++);
1.970 - if (isHighSurrogate(c1)) {
1.971 - if (index < seq.length()) {
1.972 - char c2 = seq.charAt(index);
1.973 - if (isLowSurrogate(c2)) {
1.974 - return toCodePoint(c1, c2);
1.975 - }
1.976 - }
1.977 - }
1.978 - return c1;
1.979 - }
1.980 -
1.981 - /**
1.982 - * Returns the code point at the given index of the
1.983 - * {@code char} array. If the {@code char} value at
1.984 - * the given index in the {@code char} array is in the
1.985 - * high-surrogate range, the following index is less than the
1.986 - * length of the {@code char} array, and the
1.987 - * {@code char} value at the following index is in the
1.988 - * low-surrogate range, then the supplementary code point
1.989 - * corresponding to this surrogate pair is returned. Otherwise,
1.990 - * the {@code char} value at the given index is returned.
1.991 - *
1.992 - * @param a the {@code char} array
1.993 - * @param index the index to the {@code char} values (Unicode
1.994 - * code units) in the {@code char} array to be converted
1.995 - * @return the Unicode code point at the given index
1.996 - * @exception NullPointerException if {@code a} is null.
1.997 - * @exception IndexOutOfBoundsException if the value
1.998 - * {@code index} is negative or not less than
1.999 - * the length of the {@code char} array.
1.1000 - * @since 1.5
1.1001 - */
1.1002 - public static int codePointAt(char[] a, int index) {
1.1003 - return codePointAtImpl(a, index, a.length);
1.1004 - }
1.1005 -
1.1006 - /**
1.1007 - * Returns the code point at the given index of the
1.1008 - * {@code char} array, where only array elements with
1.1009 - * {@code index} less than {@code limit} can be used. If
1.1010 - * the {@code char} value at the given index in the
1.1011 - * {@code char} array is in the high-surrogate range, the
1.1012 - * following index is less than the {@code limit}, and the
1.1013 - * {@code char} value at the following index is in the
1.1014 - * low-surrogate range, then the supplementary code point
1.1015 - * corresponding to this surrogate pair is returned. Otherwise,
1.1016 - * the {@code char} value at the given index is returned.
1.1017 - *
1.1018 - * @param a the {@code char} array
1.1019 - * @param index the index to the {@code char} values (Unicode
1.1020 - * code units) in the {@code char} array to be converted
1.1021 - * @param limit the index after the last array element that
1.1022 - * can be used in the {@code char} array
1.1023 - * @return the Unicode code point at the given index
1.1024 - * @exception NullPointerException if {@code a} is null.
1.1025 - * @exception IndexOutOfBoundsException if the {@code index}
1.1026 - * argument is negative or not less than the {@code limit}
1.1027 - * argument, or if the {@code limit} argument is negative or
1.1028 - * greater than the length of the {@code char} array.
1.1029 - * @since 1.5
1.1030 - */
1.1031 - public static int codePointAt(char[] a, int index, int limit) {
1.1032 - if (index >= limit || limit < 0 || limit > a.length) {
1.1033 - throw new IndexOutOfBoundsException();
1.1034 - }
1.1035 - return codePointAtImpl(a, index, limit);
1.1036 - }
1.1037 -
1.1038 - // throws ArrayIndexOutofBoundsException if index out of bounds
1.1039 - static int codePointAtImpl(char[] a, int index, int limit) {
1.1040 - char c1 = a[index++];
1.1041 - if (isHighSurrogate(c1)) {
1.1042 - if (index < limit) {
1.1043 - char c2 = a[index];
1.1044 - if (isLowSurrogate(c2)) {
1.1045 - return toCodePoint(c1, c2);
1.1046 - }
1.1047 - }
1.1048 - }
1.1049 - return c1;
1.1050 - }
1.1051 -
1.1052 - /**
1.1053 - * Returns the code point preceding the given index of the
1.1054 - * {@code CharSequence}. If the {@code char} value at
1.1055 - * {@code (index - 1)} in the {@code CharSequence} is in
1.1056 - * the low-surrogate range, {@code (index - 2)} is not
1.1057 - * negative, and the {@code char} value at {@code (index - 2)}
1.1058 - * in the {@code CharSequence} is in the
1.1059 - * high-surrogate range, then the supplementary code point
1.1060 - * corresponding to this surrogate pair is returned. Otherwise,
1.1061 - * the {@code char} value at {@code (index - 1)} is
1.1062 - * returned.
1.1063 - *
1.1064 - * @param seq the {@code CharSequence} instance
1.1065 - * @param index the index following the code point that should be returned
1.1066 - * @return the Unicode code point value before the given index.
1.1067 - * @exception NullPointerException if {@code seq} is null.
1.1068 - * @exception IndexOutOfBoundsException if the {@code index}
1.1069 - * argument is less than 1 or greater than {@link
1.1070 - * CharSequence#length() seq.length()}.
1.1071 - * @since 1.5
1.1072 - */
1.1073 - public static int codePointBefore(CharSequence seq, int index) {
1.1074 - char c2 = seq.charAt(--index);
1.1075 - if (isLowSurrogate(c2)) {
1.1076 - if (index > 0) {
1.1077 - char c1 = seq.charAt(--index);
1.1078 - if (isHighSurrogate(c1)) {
1.1079 - return toCodePoint(c1, c2);
1.1080 - }
1.1081 - }
1.1082 - }
1.1083 - return c2;
1.1084 - }
1.1085 -
1.1086 - /**
1.1087 - * Returns the code point preceding the given index of the
1.1088 - * {@code char} array. If the {@code char} value at
1.1089 - * {@code (index - 1)} in the {@code char} array is in
1.1090 - * the low-surrogate range, {@code (index - 2)} is not
1.1091 - * negative, and the {@code char} value at {@code (index - 2)}
1.1092 - * in the {@code char} array is in the
1.1093 - * high-surrogate range, then the supplementary code point
1.1094 - * corresponding to this surrogate pair is returned. Otherwise,
1.1095 - * the {@code char} value at {@code (index - 1)} is
1.1096 - * returned.
1.1097 - *
1.1098 - * @param a the {@code char} array
1.1099 - * @param index the index following the code point that should be returned
1.1100 - * @return the Unicode code point value before the given index.
1.1101 - * @exception NullPointerException if {@code a} is null.
1.1102 - * @exception IndexOutOfBoundsException if the {@code index}
1.1103 - * argument is less than 1 or greater than the length of the
1.1104 - * {@code char} array
1.1105 - * @since 1.5
1.1106 - */
1.1107 - public static int codePointBefore(char[] a, int index) {
1.1108 - return codePointBeforeImpl(a, index, 0);
1.1109 - }
1.1110 -
1.1111 - /**
1.1112 - * Returns the code point preceding the given index of the
1.1113 - * {@code char} array, where only array elements with
1.1114 - * {@code index} greater than or equal to {@code start}
1.1115 - * can be used. If the {@code char} value at {@code (index - 1)}
1.1116 - * in the {@code char} array is in the
1.1117 - * low-surrogate range, {@code (index - 2)} is not less than
1.1118 - * {@code start}, and the {@code char} value at
1.1119 - * {@code (index - 2)} in the {@code char} array is in
1.1120 - * the high-surrogate range, then the supplementary code point
1.1121 - * corresponding to this surrogate pair is returned. Otherwise,
1.1122 - * the {@code char} value at {@code (index - 1)} is
1.1123 - * returned.
1.1124 - *
1.1125 - * @param a the {@code char} array
1.1126 - * @param index the index following the code point that should be returned
1.1127 - * @param start the index of the first array element in the
1.1128 - * {@code char} array
1.1129 - * @return the Unicode code point value before the given index.
1.1130 - * @exception NullPointerException if {@code a} is null.
1.1131 - * @exception IndexOutOfBoundsException if the {@code index}
1.1132 - * argument is not greater than the {@code start} argument or
1.1133 - * is greater than the length of the {@code char} array, or
1.1134 - * if the {@code start} argument is negative or not less than
1.1135 - * the length of the {@code char} array.
1.1136 - * @since 1.5
1.1137 - */
1.1138 - public static int codePointBefore(char[] a, int index, int start) {
1.1139 - if (index <= start || start < 0 || start >= a.length) {
1.1140 - throw new IndexOutOfBoundsException();
1.1141 - }
1.1142 - return codePointBeforeImpl(a, index, start);
1.1143 - }
1.1144 -
1.1145 - // throws ArrayIndexOutofBoundsException if index-1 out of bounds
1.1146 - static int codePointBeforeImpl(char[] a, int index, int start) {
1.1147 - char c2 = a[--index];
1.1148 - if (isLowSurrogate(c2)) {
1.1149 - if (index > start) {
1.1150 - char c1 = a[--index];
1.1151 - if (isHighSurrogate(c1)) {
1.1152 - return toCodePoint(c1, c2);
1.1153 - }
1.1154 - }
1.1155 - }
1.1156 - return c2;
1.1157 - }
1.1158 -
1.1159 - /**
1.1160 - * Returns the leading surrogate (a
1.1161 - * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.1162 - * high surrogate code unit</a>) of the
1.1163 - * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.1164 - * surrogate pair</a>
1.1165 - * representing the specified supplementary character (Unicode
1.1166 - * code point) in the UTF-16 encoding. If the specified character
1.1167 - * is not a
1.1168 - * <a href="Character.html#supplementary">supplementary character</a>,
1.1169 - * an unspecified {@code char} is returned.
1.1170 - *
1.1171 - * <p>If
1.1172 - * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
1.1173 - * is {@code true}, then
1.1174 - * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
1.1175 - * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
1.1176 - * are also always {@code true}.
1.1177 - *
1.1178 - * @param codePoint a supplementary character (Unicode code point)
1.1179 - * @return the leading surrogate code unit used to represent the
1.1180 - * character in the UTF-16 encoding
1.1181 - * @since 1.7
1.1182 - */
1.1183 - public static char highSurrogate(int codePoint) {
1.1184 - return (char) ((codePoint >>> 10)
1.1185 - + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
1.1186 - }
1.1187 -
1.1188 - /**
1.1189 - * Returns the trailing surrogate (a
1.1190 - * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.1191 - * low surrogate code unit</a>) of the
1.1192 - * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.1193 - * surrogate pair</a>
1.1194 - * representing the specified supplementary character (Unicode
1.1195 - * code point) in the UTF-16 encoding. If the specified character
1.1196 - * is not a
1.1197 - * <a href="Character.html#supplementary">supplementary character</a>,
1.1198 - * an unspecified {@code char} is returned.
1.1199 - *
1.1200 - * <p>If
1.1201 - * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
1.1202 - * is {@code true}, then
1.1203 - * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
1.1204 - * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
1.1205 - * are also always {@code true}.
1.1206 - *
1.1207 - * @param codePoint a supplementary character (Unicode code point)
1.1208 - * @return the trailing surrogate code unit used to represent the
1.1209 - * character in the UTF-16 encoding
1.1210 - * @since 1.7
1.1211 - */
1.1212 - public static char lowSurrogate(int codePoint) {
1.1213 - return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
1.1214 - }
1.1215 -
1.1216 - /**
1.1217 - * Converts the specified character (Unicode code point) to its
1.1218 - * UTF-16 representation. If the specified code point is a BMP
1.1219 - * (Basic Multilingual Plane or Plane 0) value, the same value is
1.1220 - * stored in {@code dst[dstIndex]}, and 1 is returned. If the
1.1221 - * specified code point is a supplementary character, its
1.1222 - * surrogate values are stored in {@code dst[dstIndex]}
1.1223 - * (high-surrogate) and {@code dst[dstIndex+1]}
1.1224 - * (low-surrogate), and 2 is returned.
1.1225 - *
1.1226 - * @param codePoint the character (Unicode code point) to be converted.
1.1227 - * @param dst an array of {@code char} in which the
1.1228 - * {@code codePoint}'s UTF-16 value is stored.
1.1229 - * @param dstIndex the start index into the {@code dst}
1.1230 - * array where the converted value is stored.
1.1231 - * @return 1 if the code point is a BMP code point, 2 if the
1.1232 - * code point is a supplementary code point.
1.1233 - * @exception IllegalArgumentException if the specified
1.1234 - * {@code codePoint} is not a valid Unicode code point.
1.1235 - * @exception NullPointerException if the specified {@code dst} is null.
1.1236 - * @exception IndexOutOfBoundsException if {@code dstIndex}
1.1237 - * is negative or not less than {@code dst.length}, or if
1.1238 - * {@code dst} at {@code dstIndex} doesn't have enough
1.1239 - * array element(s) to store the resulting {@code char}
1.1240 - * value(s). (If {@code dstIndex} is equal to
1.1241 - * {@code dst.length-1} and the specified
1.1242 - * {@code codePoint} is a supplementary character, the
1.1243 - * high-surrogate value is not stored in
1.1244 - * {@code dst[dstIndex]}.)
1.1245 - * @since 1.5
1.1246 - */
1.1247 - public static int toChars(int codePoint, char[] dst, int dstIndex) {
1.1248 - if (isBmpCodePoint(codePoint)) {
1.1249 - dst[dstIndex] = (char) codePoint;
1.1250 - return 1;
1.1251 - } else if (isValidCodePoint(codePoint)) {
1.1252 - toSurrogates(codePoint, dst, dstIndex);
1.1253 - return 2;
1.1254 - } else {
1.1255 - throw new IllegalArgumentException();
1.1256 - }
1.1257 - }
1.1258 -
1.1259 - /**
1.1260 - * Converts the specified character (Unicode code point) to its
1.1261 - * UTF-16 representation stored in a {@code char} array. If
1.1262 - * the specified code point is a BMP (Basic Multilingual Plane or
1.1263 - * Plane 0) value, the resulting {@code char} array has
1.1264 - * the same value as {@code codePoint}. If the specified code
1.1265 - * point is a supplementary code point, the resulting
1.1266 - * {@code char} array has the corresponding surrogate pair.
1.1267 - *
1.1268 - * @param codePoint a Unicode code point
1.1269 - * @return a {@code char} array having
1.1270 - * {@code codePoint}'s UTF-16 representation.
1.1271 - * @exception IllegalArgumentException if the specified
1.1272 - * {@code codePoint} is not a valid Unicode code point.
1.1273 - * @since 1.5
1.1274 - */
1.1275 - public static char[] toChars(int codePoint) {
1.1276 - if (isBmpCodePoint(codePoint)) {
1.1277 - return new char[] { (char) codePoint };
1.1278 - } else if (isValidCodePoint(codePoint)) {
1.1279 - char[] result = new char[2];
1.1280 - toSurrogates(codePoint, result, 0);
1.1281 - return result;
1.1282 - } else {
1.1283 - throw new IllegalArgumentException();
1.1284 - }
1.1285 - }
1.1286 -
1.1287 - static void toSurrogates(int codePoint, char[] dst, int index) {
1.1288 - // We write elements "backwards" to guarantee all-or-nothing
1.1289 - dst[index+1] = lowSurrogate(codePoint);
1.1290 - dst[index] = highSurrogate(codePoint);
1.1291 - }
1.1292 -
1.1293 - /**
1.1294 - * Returns the number of Unicode code points in the text range of
1.1295 - * the specified char sequence. The text range begins at the
1.1296 - * specified {@code beginIndex} and extends to the
1.1297 - * {@code char} at index {@code endIndex - 1}. Thus the
1.1298 - * length (in {@code char}s) of the text range is
1.1299 - * {@code endIndex-beginIndex}. Unpaired surrogates within
1.1300 - * the text range count as one code point each.
1.1301 - *
1.1302 - * @param seq the char sequence
1.1303 - * @param beginIndex the index to the first {@code char} of
1.1304 - * the text range.
1.1305 - * @param endIndex the index after the last {@code char} of
1.1306 - * the text range.
1.1307 - * @return the number of Unicode code points in the specified text
1.1308 - * range
1.1309 - * @exception NullPointerException if {@code seq} is null.
1.1310 - * @exception IndexOutOfBoundsException if the
1.1311 - * {@code beginIndex} is negative, or {@code endIndex}
1.1312 - * is larger than the length of the given sequence, or
1.1313 - * {@code beginIndex} is larger than {@code endIndex}.
1.1314 - * @since 1.5
1.1315 - */
1.1316 - public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
1.1317 - int length = seq.length();
1.1318 - if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
1.1319 - throw new IndexOutOfBoundsException();
1.1320 - }
1.1321 - int n = endIndex - beginIndex;
1.1322 - for (int i = beginIndex; i < endIndex; ) {
1.1323 - if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
1.1324 - isLowSurrogate(seq.charAt(i))) {
1.1325 - n--;
1.1326 - i++;
1.1327 - }
1.1328 - }
1.1329 - return n;
1.1330 - }
1.1331 -
1.1332 - /**
1.1333 - * Returns the number of Unicode code points in a subarray of the
1.1334 - * {@code char} array argument. The {@code offset}
1.1335 - * argument is the index of the first {@code char} of the
1.1336 - * subarray and the {@code count} argument specifies the
1.1337 - * length of the subarray in {@code char}s. Unpaired
1.1338 - * surrogates within the subarray count as one code point each.
1.1339 - *
1.1340 - * @param a the {@code char} array
1.1341 - * @param offset the index of the first {@code char} in the
1.1342 - * given {@code char} array
1.1343 - * @param count the length of the subarray in {@code char}s
1.1344 - * @return the number of Unicode code points in the specified subarray
1.1345 - * @exception NullPointerException if {@code a} is null.
1.1346 - * @exception IndexOutOfBoundsException if {@code offset} or
1.1347 - * {@code count} is negative, or if {@code offset +
1.1348 - * count} is larger than the length of the given array.
1.1349 - * @since 1.5
1.1350 - */
1.1351 - public static int codePointCount(char[] a, int offset, int count) {
1.1352 - if (count > a.length - offset || offset < 0 || count < 0) {
1.1353 - throw new IndexOutOfBoundsException();
1.1354 - }
1.1355 - return codePointCountImpl(a, offset, count);
1.1356 - }
1.1357 -
1.1358 - static int codePointCountImpl(char[] a, int offset, int count) {
1.1359 - int endIndex = offset + count;
1.1360 - int n = count;
1.1361 - for (int i = offset; i < endIndex; ) {
1.1362 - if (isHighSurrogate(a[i++]) && i < endIndex &&
1.1363 - isLowSurrogate(a[i])) {
1.1364 - n--;
1.1365 - i++;
1.1366 - }
1.1367 - }
1.1368 - return n;
1.1369 - }
1.1370 -
1.1371 - /**
1.1372 - * Returns the index within the given char sequence that is offset
1.1373 - * from the given {@code index} by {@code codePointOffset}
1.1374 - * code points. Unpaired surrogates within the text range given by
1.1375 - * {@code index} and {@code codePointOffset} count as
1.1376 - * one code point each.
1.1377 - *
1.1378 - * @param seq the char sequence
1.1379 - * @param index the index to be offset
1.1380 - * @param codePointOffset the offset in code points
1.1381 - * @return the index within the char sequence
1.1382 - * @exception NullPointerException if {@code seq} is null.
1.1383 - * @exception IndexOutOfBoundsException if {@code index}
1.1384 - * is negative or larger then the length of the char sequence,
1.1385 - * or if {@code codePointOffset} is positive and the
1.1386 - * subsequence starting with {@code index} has fewer than
1.1387 - * {@code codePointOffset} code points, or if
1.1388 - * {@code codePointOffset} is negative and the subsequence
1.1389 - * before {@code index} has fewer than the absolute value
1.1390 - * of {@code codePointOffset} code points.
1.1391 - * @since 1.5
1.1392 - */
1.1393 - public static int offsetByCodePoints(CharSequence seq, int index,
1.1394 - int codePointOffset) {
1.1395 - int length = seq.length();
1.1396 - if (index < 0 || index > length) {
1.1397 - throw new IndexOutOfBoundsException();
1.1398 - }
1.1399 -
1.1400 - int x = index;
1.1401 - if (codePointOffset >= 0) {
1.1402 - int i;
1.1403 - for (i = 0; x < length && i < codePointOffset; i++) {
1.1404 - if (isHighSurrogate(seq.charAt(x++)) && x < length &&
1.1405 - isLowSurrogate(seq.charAt(x))) {
1.1406 - x++;
1.1407 - }
1.1408 - }
1.1409 - if (i < codePointOffset) {
1.1410 - throw new IndexOutOfBoundsException();
1.1411 - }
1.1412 - } else {
1.1413 - int i;
1.1414 - for (i = codePointOffset; x > 0 && i < 0; i++) {
1.1415 - if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
1.1416 - isHighSurrogate(seq.charAt(x-1))) {
1.1417 - x--;
1.1418 - }
1.1419 - }
1.1420 - if (i < 0) {
1.1421 - throw new IndexOutOfBoundsException();
1.1422 - }
1.1423 - }
1.1424 - return x;
1.1425 - }
1.1426 -
1.1427 - /**
1.1428 - * Returns the index within the given {@code char} subarray
1.1429 - * that is offset from the given {@code index} by
1.1430 - * {@code codePointOffset} code points. The
1.1431 - * {@code start} and {@code count} arguments specify a
1.1432 - * subarray of the {@code char} array. Unpaired surrogates
1.1433 - * within the text range given by {@code index} and
1.1434 - * {@code codePointOffset} count as one code point each.
1.1435 - *
1.1436 - * @param a the {@code char} array
1.1437 - * @param start the index of the first {@code char} of the
1.1438 - * subarray
1.1439 - * @param count the length of the subarray in {@code char}s
1.1440 - * @param index the index to be offset
1.1441 - * @param codePointOffset the offset in code points
1.1442 - * @return the index within the subarray
1.1443 - * @exception NullPointerException if {@code a} is null.
1.1444 - * @exception IndexOutOfBoundsException
1.1445 - * if {@code start} or {@code count} is negative,
1.1446 - * or if {@code start + count} is larger than the length of
1.1447 - * the given array,
1.1448 - * or if {@code index} is less than {@code start} or
1.1449 - * larger then {@code start + count},
1.1450 - * or if {@code codePointOffset} is positive and the text range
1.1451 - * starting with {@code index} and ending with {@code start + count - 1}
1.1452 - * has fewer than {@code codePointOffset} code
1.1453 - * points,
1.1454 - * or if {@code codePointOffset} is negative and the text range
1.1455 - * starting with {@code start} and ending with {@code index - 1}
1.1456 - * has fewer than the absolute value of
1.1457 - * {@code codePointOffset} code points.
1.1458 - * @since 1.5
1.1459 - */
1.1460 - public static int offsetByCodePoints(char[] a, int start, int count,
1.1461 - int index, int codePointOffset) {
1.1462 - if (count > a.length-start || start < 0 || count < 0
1.1463 - || index < start || index > start+count) {
1.1464 - throw new IndexOutOfBoundsException();
1.1465 - }
1.1466 - return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
1.1467 - }
1.1468 -
1.1469 - static int offsetByCodePointsImpl(char[]a, int start, int count,
1.1470 - int index, int codePointOffset) {
1.1471 - int x = index;
1.1472 - if (codePointOffset >= 0) {
1.1473 - int limit = start + count;
1.1474 - int i;
1.1475 - for (i = 0; x < limit && i < codePointOffset; i++) {
1.1476 - if (isHighSurrogate(a[x++]) && x < limit &&
1.1477 - isLowSurrogate(a[x])) {
1.1478 - x++;
1.1479 - }
1.1480 - }
1.1481 - if (i < codePointOffset) {
1.1482 - throw new IndexOutOfBoundsException();
1.1483 - }
1.1484 - } else {
1.1485 - int i;
1.1486 - for (i = codePointOffset; x > start && i < 0; i++) {
1.1487 - if (isLowSurrogate(a[--x]) && x > start &&
1.1488 - isHighSurrogate(a[x-1])) {
1.1489 - x--;
1.1490 - }
1.1491 - }
1.1492 - if (i < 0) {
1.1493 - throw new IndexOutOfBoundsException();
1.1494 - }
1.1495 - }
1.1496 - return x;
1.1497 - }
1.1498 -
1.1499 - /**
1.1500 - * Determines if the specified character is a lowercase character.
1.1501 - * <p>
1.1502 - * A character is lowercase if its general category type, provided
1.1503 - * by {@code Character.getType(ch)}, is
1.1504 - * {@code LOWERCASE_LETTER}, or it has contributory property
1.1505 - * Other_Lowercase as defined by the Unicode Standard.
1.1506 - * <p>
1.1507 - * The following are examples of lowercase characters:
1.1508 - * <p><blockquote><pre>
1.1509 - * a b c d e f g h i j k l m n o p q r s t u v w x y z
1.1510 - * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
1.1511 - * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
1.1512 - * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
1.1513 - * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
1.1514 - * </pre></blockquote>
1.1515 - * <p> Many other Unicode characters are lowercase too.
1.1516 - *
1.1517 - * <p><b>Note:</b> This method cannot handle <a
1.1518 - * href="#supplementary"> supplementary characters</a>. To support
1.1519 - * all Unicode characters, including supplementary characters, use
1.1520 - * the {@link #isLowerCase(int)} method.
1.1521 - *
1.1522 - * @param ch the character to be tested.
1.1523 - * @return {@code true} if the character is lowercase;
1.1524 - * {@code false} otherwise.
1.1525 - * @see Character#isLowerCase(char)
1.1526 - * @see Character#isTitleCase(char)
1.1527 - * @see Character#toLowerCase(char)
1.1528 - * @see Character#getType(char)
1.1529 - */
1.1530 - public static boolean isLowerCase(char ch) {
1.1531 - throw new UnsupportedOperationException();
1.1532 - }
1.1533 -
1.1534 - /**
1.1535 - * Determines if the specified character is an uppercase character.
1.1536 - * <p>
1.1537 - * A character is uppercase if its general category type, provided by
1.1538 - * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
1.1539 - * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
1.1540 - * <p>
1.1541 - * The following are examples of uppercase characters:
1.1542 - * <p><blockquote><pre>
1.1543 - * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
1.1544 - * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
1.1545 - * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
1.1546 - * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
1.1547 - * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
1.1548 - * </pre></blockquote>
1.1549 - * <p> Many other Unicode characters are uppercase too.<p>
1.1550 - *
1.1551 - * <p><b>Note:</b> This method cannot handle <a
1.1552 - * href="#supplementary"> supplementary characters</a>. To support
1.1553 - * all Unicode characters, including supplementary characters, use
1.1554 - * the {@link #isUpperCase(int)} method.
1.1555 - *
1.1556 - * @param ch the character to be tested.
1.1557 - * @return {@code true} if the character is uppercase;
1.1558 - * {@code false} otherwise.
1.1559 - * @see Character#isLowerCase(char)
1.1560 - * @see Character#isTitleCase(char)
1.1561 - * @see Character#toUpperCase(char)
1.1562 - * @see Character#getType(char)
1.1563 - * @since 1.0
1.1564 - */
1.1565 - public static boolean isUpperCase(char ch) {
1.1566 - throw new UnsupportedOperationException();
1.1567 - }
1.1568 -
1.1569 - /**
1.1570 - * Determines if the specified character is a titlecase character.
1.1571 - * <p>
1.1572 - * A character is a titlecase character if its general
1.1573 - * category type, provided by {@code Character.getType(ch)},
1.1574 - * is {@code TITLECASE_LETTER}.
1.1575 - * <p>
1.1576 - * Some characters look like pairs of Latin letters. For example, there
1.1577 - * is an uppercase letter that looks like "LJ" and has a corresponding
1.1578 - * lowercase letter that looks like "lj". A third form, which looks like "Lj",
1.1579 - * is the appropriate form to use when rendering a word in lowercase
1.1580 - * with initial capitals, as for a book title.
1.1581 - * <p>
1.1582 - * These are some of the Unicode characters for which this method returns
1.1583 - * {@code true}:
1.1584 - * <ul>
1.1585 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
1.1586 - * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
1.1587 - * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
1.1588 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
1.1589 - * </ul>
1.1590 - * <p> Many other Unicode characters are titlecase too.<p>
1.1591 - *
1.1592 - * <p><b>Note:</b> This method cannot handle <a
1.1593 - * href="#supplementary"> supplementary characters</a>. To support
1.1594 - * all Unicode characters, including supplementary characters, use
1.1595 - * the {@link #isTitleCase(int)} method.
1.1596 - *
1.1597 - * @param ch the character to be tested.
1.1598 - * @return {@code true} if the character is titlecase;
1.1599 - * {@code false} otherwise.
1.1600 - * @see Character#isLowerCase(char)
1.1601 - * @see Character#isUpperCase(char)
1.1602 - * @see Character#toTitleCase(char)
1.1603 - * @see Character#getType(char)
1.1604 - * @since 1.0.2
1.1605 - */
1.1606 - public static boolean isTitleCase(char ch) {
1.1607 - return isTitleCase((int)ch);
1.1608 - }
1.1609 -
1.1610 - /**
1.1611 - * Determines if the specified character (Unicode code point) is a titlecase character.
1.1612 - * <p>
1.1613 - * A character is a titlecase character if its general
1.1614 - * category type, provided by {@link Character#getType(int) getType(codePoint)},
1.1615 - * is {@code TITLECASE_LETTER}.
1.1616 - * <p>
1.1617 - * Some characters look like pairs of Latin letters. For example, there
1.1618 - * is an uppercase letter that looks like "LJ" and has a corresponding
1.1619 - * lowercase letter that looks like "lj". A third form, which looks like "Lj",
1.1620 - * is the appropriate form to use when rendering a word in lowercase
1.1621 - * with initial capitals, as for a book title.
1.1622 - * <p>
1.1623 - * These are some of the Unicode characters for which this method returns
1.1624 - * {@code true}:
1.1625 - * <ul>
1.1626 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
1.1627 - * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
1.1628 - * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
1.1629 - * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
1.1630 - * </ul>
1.1631 - * <p> Many other Unicode characters are titlecase too.<p>
1.1632 - *
1.1633 - * @param codePoint the character (Unicode code point) to be tested.
1.1634 - * @return {@code true} if the character is titlecase;
1.1635 - * {@code false} otherwise.
1.1636 - * @see Character#isLowerCase(int)
1.1637 - * @see Character#isUpperCase(int)
1.1638 - * @see Character#toTitleCase(int)
1.1639 - * @see Character#getType(int)
1.1640 - * @since 1.5
1.1641 - */
1.1642 - public static boolean isTitleCase(int codePoint) {
1.1643 - return getType(codePoint) == Character.TITLECASE_LETTER;
1.1644 - }
1.1645 -
1.1646 - /**
1.1647 - * Determines if the specified character is a digit.
1.1648 - * <p>
1.1649 - * A character is a digit if its general category type, provided
1.1650 - * by {@code Character.getType(ch)}, is
1.1651 - * {@code DECIMAL_DIGIT_NUMBER}.
1.1652 - * <p>
1.1653 - * Some Unicode character ranges that contain digits:
1.1654 - * <ul>
1.1655 - * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
1.1656 - * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
1.1657 - * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
1.1658 - * Arabic-Indic digits
1.1659 - * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
1.1660 - * Extended Arabic-Indic digits
1.1661 - * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
1.1662 - * Devanagari digits
1.1663 - * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
1.1664 - * Fullwidth digits
1.1665 - * </ul>
1.1666 - *
1.1667 - * Many other character ranges contain digits as well.
1.1668 - *
1.1669 - * <p><b>Note:</b> This method cannot handle <a
1.1670 - * href="#supplementary"> supplementary characters</a>. To support
1.1671 - * all Unicode characters, including supplementary characters, use
1.1672 - * the {@link #isDigit(int)} method.
1.1673 - *
1.1674 - * @param ch the character to be tested.
1.1675 - * @return {@code true} if the character is a digit;
1.1676 - * {@code false} otherwise.
1.1677 - * @see Character#digit(char, int)
1.1678 - * @see Character#forDigit(int, int)
1.1679 - * @see Character#getType(char)
1.1680 - */
1.1681 - public static boolean isDigit(char ch) {
1.1682 - return isDigit((int)ch);
1.1683 - }
1.1684 -
1.1685 - /**
1.1686 - * Determines if the specified character (Unicode code point) is a digit.
1.1687 - * <p>
1.1688 - * A character is a digit if its general category type, provided
1.1689 - * by {@link Character#getType(int) getType(codePoint)}, is
1.1690 - * {@code DECIMAL_DIGIT_NUMBER}.
1.1691 - * <p>
1.1692 - * Some Unicode character ranges that contain digits:
1.1693 - * <ul>
1.1694 - * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
1.1695 - * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
1.1696 - * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
1.1697 - * Arabic-Indic digits
1.1698 - * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
1.1699 - * Extended Arabic-Indic digits
1.1700 - * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
1.1701 - * Devanagari digits
1.1702 - * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
1.1703 - * Fullwidth digits
1.1704 - * </ul>
1.1705 - *
1.1706 - * Many other character ranges contain digits as well.
1.1707 - *
1.1708 - * @param codePoint the character (Unicode code point) to be tested.
1.1709 - * @return {@code true} if the character is a digit;
1.1710 - * {@code false} otherwise.
1.1711 - * @see Character#forDigit(int, int)
1.1712 - * @see Character#getType(int)
1.1713 - * @since 1.5
1.1714 - */
1.1715 - public static boolean isDigit(int codePoint) {
1.1716 - return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
1.1717 - }
1.1718 -
1.1719 - /**
1.1720 - * Determines if a character is defined in Unicode.
1.1721 - * <p>
1.1722 - * A character is defined if at least one of the following is true:
1.1723 - * <ul>
1.1724 - * <li>It has an entry in the UnicodeData file.
1.1725 - * <li>It has a value in a range defined by the UnicodeData file.
1.1726 - * </ul>
1.1727 - *
1.1728 - * <p><b>Note:</b> This method cannot handle <a
1.1729 - * href="#supplementary"> supplementary characters</a>. To support
1.1730 - * all Unicode characters, including supplementary characters, use
1.1731 - * the {@link #isDefined(int)} method.
1.1732 - *
1.1733 - * @param ch the character to be tested
1.1734 - * @return {@code true} if the character has a defined meaning
1.1735 - * in Unicode; {@code false} otherwise.
1.1736 - * @see Character#isDigit(char)
1.1737 - * @see Character#isLetter(char)
1.1738 - * @see Character#isLetterOrDigit(char)
1.1739 - * @see Character#isLowerCase(char)
1.1740 - * @see Character#isTitleCase(char)
1.1741 - * @see Character#isUpperCase(char)
1.1742 - * @since 1.0.2
1.1743 - */
1.1744 - public static boolean isDefined(char ch) {
1.1745 - return isDefined((int)ch);
1.1746 - }
1.1747 -
1.1748 - /**
1.1749 - * Determines if a character (Unicode code point) is defined in Unicode.
1.1750 - * <p>
1.1751 - * A character is defined if at least one of the following is true:
1.1752 - * <ul>
1.1753 - * <li>It has an entry in the UnicodeData file.
1.1754 - * <li>It has a value in a range defined by the UnicodeData file.
1.1755 - * </ul>
1.1756 - *
1.1757 - * @param codePoint the character (Unicode code point) to be tested.
1.1758 - * @return {@code true} if the character has a defined meaning
1.1759 - * in Unicode; {@code false} otherwise.
1.1760 - * @see Character#isDigit(int)
1.1761 - * @see Character#isLetter(int)
1.1762 - * @see Character#isLetterOrDigit(int)
1.1763 - * @see Character#isLowerCase(int)
1.1764 - * @see Character#isTitleCase(int)
1.1765 - * @see Character#isUpperCase(int)
1.1766 - * @since 1.5
1.1767 - */
1.1768 - public static boolean isDefined(int codePoint) {
1.1769 - return getType(codePoint) != Character.UNASSIGNED;
1.1770 - }
1.1771 -
1.1772 - /**
1.1773 - * Determines if the specified character is a letter.
1.1774 - * <p>
1.1775 - * A character is considered to be a letter if its general
1.1776 - * category type, provided by {@code Character.getType(ch)},
1.1777 - * is any of the following:
1.1778 - * <ul>
1.1779 - * <li> {@code UPPERCASE_LETTER}
1.1780 - * <li> {@code LOWERCASE_LETTER}
1.1781 - * <li> {@code TITLECASE_LETTER}
1.1782 - * <li> {@code MODIFIER_LETTER}
1.1783 - * <li> {@code OTHER_LETTER}
1.1784 - * </ul>
1.1785 - *
1.1786 - * Not all letters have case. Many characters are
1.1787 - * letters but are neither uppercase nor lowercase nor titlecase.
1.1788 - *
1.1789 - * <p><b>Note:</b> This method cannot handle <a
1.1790 - * href="#supplementary"> supplementary characters</a>. To support
1.1791 - * all Unicode characters, including supplementary characters, use
1.1792 - * the {@link #isLetter(int)} method.
1.1793 - *
1.1794 - * @param ch the character to be tested.
1.1795 - * @return {@code true} if the character is a letter;
1.1796 - * {@code false} otherwise.
1.1797 - * @see Character#isDigit(char)
1.1798 - * @see Character#isJavaIdentifierStart(char)
1.1799 - * @see Character#isJavaLetter(char)
1.1800 - * @see Character#isJavaLetterOrDigit(char)
1.1801 - * @see Character#isLetterOrDigit(char)
1.1802 - * @see Character#isLowerCase(char)
1.1803 - * @see Character#isTitleCase(char)
1.1804 - * @see Character#isUnicodeIdentifierStart(char)
1.1805 - * @see Character#isUpperCase(char)
1.1806 - */
1.1807 - public static boolean isLetter(char ch) {
1.1808 - return isLetter((int)ch);
1.1809 - }
1.1810 -
1.1811 - /**
1.1812 - * Determines if the specified character (Unicode code point) is a letter.
1.1813 - * <p>
1.1814 - * A character is considered to be a letter if its general
1.1815 - * category type, provided by {@link Character#getType(int) getType(codePoint)},
1.1816 - * is any of the following:
1.1817 - * <ul>
1.1818 - * <li> {@code UPPERCASE_LETTER}
1.1819 - * <li> {@code LOWERCASE_LETTER}
1.1820 - * <li> {@code TITLECASE_LETTER}
1.1821 - * <li> {@code MODIFIER_LETTER}
1.1822 - * <li> {@code OTHER_LETTER}
1.1823 - * </ul>
1.1824 - *
1.1825 - * Not all letters have case. Many characters are
1.1826 - * letters but are neither uppercase nor lowercase nor titlecase.
1.1827 - *
1.1828 - * @param codePoint the character (Unicode code point) to be tested.
1.1829 - * @return {@code true} if the character is a letter;
1.1830 - * {@code false} otherwise.
1.1831 - * @see Character#isDigit(int)
1.1832 - * @see Character#isJavaIdentifierStart(int)
1.1833 - * @see Character#isLetterOrDigit(int)
1.1834 - * @see Character#isLowerCase(int)
1.1835 - * @see Character#isTitleCase(int)
1.1836 - * @see Character#isUnicodeIdentifierStart(int)
1.1837 - * @see Character#isUpperCase(int)
1.1838 - * @since 1.5
1.1839 - */
1.1840 - public static boolean isLetter(int codePoint) {
1.1841 - return ((((1 << Character.UPPERCASE_LETTER) |
1.1842 - (1 << Character.LOWERCASE_LETTER) |
1.1843 - (1 << Character.TITLECASE_LETTER) |
1.1844 - (1 << Character.MODIFIER_LETTER) |
1.1845 - (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
1.1846 - != 0;
1.1847 - }
1.1848 -
1.1849 - /**
1.1850 - * Determines if the specified character is a letter or digit.
1.1851 - * <p>
1.1852 - * A character is considered to be a letter or digit if either
1.1853 - * {@code Character.isLetter(char ch)} or
1.1854 - * {@code Character.isDigit(char ch)} returns
1.1855 - * {@code true} for the character.
1.1856 - *
1.1857 - * <p><b>Note:</b> This method cannot handle <a
1.1858 - * href="#supplementary"> supplementary characters</a>. To support
1.1859 - * all Unicode characters, including supplementary characters, use
1.1860 - * the {@link #isLetterOrDigit(int)} method.
1.1861 - *
1.1862 - * @param ch the character to be tested.
1.1863 - * @return {@code true} if the character is a letter or digit;
1.1864 - * {@code false} otherwise.
1.1865 - * @see Character#isDigit(char)
1.1866 - * @see Character#isJavaIdentifierPart(char)
1.1867 - * @see Character#isJavaLetter(char)
1.1868 - * @see Character#isJavaLetterOrDigit(char)
1.1869 - * @see Character#isLetter(char)
1.1870 - * @see Character#isUnicodeIdentifierPart(char)
1.1871 - * @since 1.0.2
1.1872 - */
1.1873 - public static boolean isLetterOrDigit(char ch) {
1.1874 - return isLetterOrDigit((int)ch);
1.1875 - }
1.1876 -
1.1877 - /**
1.1878 - * Determines if the specified character (Unicode code point) is a letter or digit.
1.1879 - * <p>
1.1880 - * A character is considered to be a letter or digit if either
1.1881 - * {@link #isLetter(int) isLetter(codePoint)} or
1.1882 - * {@link #isDigit(int) isDigit(codePoint)} returns
1.1883 - * {@code true} for the character.
1.1884 - *
1.1885 - * @param codePoint the character (Unicode code point) to be tested.
1.1886 - * @return {@code true} if the character is a letter or digit;
1.1887 - * {@code false} otherwise.
1.1888 - * @see Character#isDigit(int)
1.1889 - * @see Character#isJavaIdentifierPart(int)
1.1890 - * @see Character#isLetter(int)
1.1891 - * @see Character#isUnicodeIdentifierPart(int)
1.1892 - * @since 1.5
1.1893 - */
1.1894 - public static boolean isLetterOrDigit(int codePoint) {
1.1895 - return ((((1 << Character.UPPERCASE_LETTER) |
1.1896 - (1 << Character.LOWERCASE_LETTER) |
1.1897 - (1 << Character.TITLECASE_LETTER) |
1.1898 - (1 << Character.MODIFIER_LETTER) |
1.1899 - (1 << Character.OTHER_LETTER) |
1.1900 - (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
1.1901 - != 0;
1.1902 - }
1.1903 -
1.1904 - static int getType(int x) {
1.1905 - throw new UnsupportedOperationException();
1.1906 - }
1.1907 -
1.1908 - /**
1.1909 - * Converts the character argument to lowercase using case
1.1910 - * mapping information from the UnicodeData file.
1.1911 - * <p>
1.1912 - * Note that
1.1913 - * {@code Character.isLowerCase(Character.toLowerCase(ch))}
1.1914 - * does not always return {@code true} for some ranges of
1.1915 - * characters, particularly those that are symbols or ideographs.
1.1916 - *
1.1917 - * <p>In general, {@link String#toLowerCase()} should be used to map
1.1918 - * characters to lowercase. {@code String} case mapping methods
1.1919 - * have several benefits over {@code Character} case mapping methods.
1.1920 - * {@code String} case mapping methods can perform locale-sensitive
1.1921 - * mappings, context-sensitive mappings, and 1:M character mappings, whereas
1.1922 - * the {@code Character} case mapping methods cannot.
1.1923 - *
1.1924 - * <p><b>Note:</b> This method cannot handle <a
1.1925 - * href="#supplementary"> supplementary characters</a>. To support
1.1926 - * all Unicode characters, including supplementary characters, use
1.1927 - * the {@link #toLowerCase(int)} method.
1.1928 - *
1.1929 - * @param ch the character to be converted.
1.1930 - * @return the lowercase equivalent of the character, if any;
1.1931 - * otherwise, the character itself.
1.1932 - * @see Character#isLowerCase(char)
1.1933 - * @see String#toLowerCase()
1.1934 - */
1.1935 - public static char toLowerCase(char ch) {
1.1936 - throw new UnsupportedOperationException();
1.1937 - }
1.1938 -
1.1939 - /**
1.1940 - * Converts the character argument to uppercase using case mapping
1.1941 - * information from the UnicodeData file.
1.1942 - * <p>
1.1943 - * Note that
1.1944 - * {@code Character.isUpperCase(Character.toUpperCase(ch))}
1.1945 - * does not always return {@code true} for some ranges of
1.1946 - * characters, particularly those that are symbols or ideographs.
1.1947 - *
1.1948 - * <p>In general, {@link String#toUpperCase()} should be used to map
1.1949 - * characters to uppercase. {@code String} case mapping methods
1.1950 - * have several benefits over {@code Character} case mapping methods.
1.1951 - * {@code String} case mapping methods can perform locale-sensitive
1.1952 - * mappings, context-sensitive mappings, and 1:M character mappings, whereas
1.1953 - * the {@code Character} case mapping methods cannot.
1.1954 - *
1.1955 - * <p><b>Note:</b> This method cannot handle <a
1.1956 - * href="#supplementary"> supplementary characters</a>. To support
1.1957 - * all Unicode characters, including supplementary characters, use
1.1958 - * the {@link #toUpperCase(int)} method.
1.1959 - *
1.1960 - * @param ch the character to be converted.
1.1961 - * @return the uppercase equivalent of the character, if any;
1.1962 - * otherwise, the character itself.
1.1963 - * @see Character#isUpperCase(char)
1.1964 - * @see String#toUpperCase()
1.1965 - */
1.1966 - public static char toUpperCase(char ch) {
1.1967 - throw new UnsupportedOperationException();
1.1968 - }
1.1969 -
1.1970 - /**
1.1971 - * Returns the numeric value of the character {@code ch} in the
1.1972 - * specified radix.
1.1973 - * <p>
1.1974 - * If the radix is not in the range {@code MIN_RADIX} ≤
1.1975 - * {@code radix} ≤ {@code MAX_RADIX} or if the
1.1976 - * value of {@code ch} is not a valid digit in the specified
1.1977 - * radix, {@code -1} is returned. A character is a valid digit
1.1978 - * if at least one of the following is true:
1.1979 - * <ul>
1.1980 - * <li>The method {@code isDigit} is {@code true} of the character
1.1981 - * and the Unicode decimal digit value of the character (or its
1.1982 - * single-character decomposition) is less than the specified radix.
1.1983 - * In this case the decimal digit value is returned.
1.1984 - * <li>The character is one of the uppercase Latin letters
1.1985 - * {@code 'A'} through {@code 'Z'} and its code is less than
1.1986 - * {@code radix + 'A' - 10}.
1.1987 - * In this case, {@code ch - 'A' + 10}
1.1988 - * is returned.
1.1989 - * <li>The character is one of the lowercase Latin letters
1.1990 - * {@code 'a'} through {@code 'z'} and its code is less than
1.1991 - * {@code radix + 'a' - 10}.
1.1992 - * In this case, {@code ch - 'a' + 10}
1.1993 - * is returned.
1.1994 - * <li>The character is one of the fullwidth uppercase Latin letters A
1.1995 - * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
1.1996 - * and its code is less than
1.1997 - * {@code radix + '\u005CuFF21' - 10}.
1.1998 - * In this case, {@code ch - '\u005CuFF21' + 10}
1.1999 - * is returned.
1.2000 - * <li>The character is one of the fullwidth lowercase Latin letters a
1.2001 - * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
1.2002 - * and its code is less than
1.2003 - * {@code radix + '\u005CuFF41' - 10}.
1.2004 - * In this case, {@code ch - '\u005CuFF41' + 10}
1.2005 - * is returned.
1.2006 - * </ul>
1.2007 - *
1.2008 - * <p><b>Note:</b> This method cannot handle <a
1.2009 - * href="#supplementary"> supplementary characters</a>. To support
1.2010 - * all Unicode characters, including supplementary characters, use
1.2011 - * the {@link #digit(int, int)} method.
1.2012 - *
1.2013 - * @param ch the character to be converted.
1.2014 - * @param radix the radix.
1.2015 - * @return the numeric value represented by the character in the
1.2016 - * specified radix.
1.2017 - * @see Character#forDigit(int, int)
1.2018 - * @see Character#isDigit(char)
1.2019 - */
1.2020 - public static int digit(char ch, int radix) {
1.2021 - return digit((int)ch, radix);
1.2022 - }
1.2023 -
1.2024 - /**
1.2025 - * Returns the numeric value of the specified character (Unicode
1.2026 - * code point) in the specified radix.
1.2027 - *
1.2028 - * <p>If the radix is not in the range {@code MIN_RADIX} ≤
1.2029 - * {@code radix} ≤ {@code MAX_RADIX} or if the
1.2030 - * character is not a valid digit in the specified
1.2031 - * radix, {@code -1} is returned. A character is a valid digit
1.2032 - * if at least one of the following is true:
1.2033 - * <ul>
1.2034 - * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
1.2035 - * and the Unicode decimal digit value of the character (or its
1.2036 - * single-character decomposition) is less than the specified radix.
1.2037 - * In this case the decimal digit value is returned.
1.2038 - * <li>The character is one of the uppercase Latin letters
1.2039 - * {@code 'A'} through {@code 'Z'} and its code is less than
1.2040 - * {@code radix + 'A' - 10}.
1.2041 - * In this case, {@code codePoint - 'A' + 10}
1.2042 - * is returned.
1.2043 - * <li>The character is one of the lowercase Latin letters
1.2044 - * {@code 'a'} through {@code 'z'} and its code is less than
1.2045 - * {@code radix + 'a' - 10}.
1.2046 - * In this case, {@code codePoint - 'a' + 10}
1.2047 - * is returned.
1.2048 - * <li>The character is one of the fullwidth uppercase Latin letters A
1.2049 - * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
1.2050 - * and its code is less than
1.2051 - * {@code radix + '\u005CuFF21' - 10}.
1.2052 - * In this case,
1.2053 - * {@code codePoint - '\u005CuFF21' + 10}
1.2054 - * is returned.
1.2055 - * <li>The character is one of the fullwidth lowercase Latin letters a
1.2056 - * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
1.2057 - * and its code is less than
1.2058 - * {@code radix + '\u005CuFF41'- 10}.
1.2059 - * In this case,
1.2060 - * {@code codePoint - '\u005CuFF41' + 10}
1.2061 - * is returned.
1.2062 - * </ul>
1.2063 - *
1.2064 - * @param codePoint the character (Unicode code point) to be converted.
1.2065 - * @param radix the radix.
1.2066 - * @return the numeric value represented by the character in the
1.2067 - * specified radix.
1.2068 - * @see Character#forDigit(int, int)
1.2069 - * @see Character#isDigit(int)
1.2070 - * @since 1.5
1.2071 - */
1.2072 - public static int digit(int codePoint, int radix) {
1.2073 - throw new UnsupportedOperationException();
1.2074 - }
1.2075 -
1.2076 - /**
1.2077 - * Returns the {@code int} value that the specified Unicode
1.2078 - * character represents. For example, the character
1.2079 - * {@code '\u005Cu216C'} (the roman numeral fifty) will return
1.2080 - * an int with a value of 50.
1.2081 - * <p>
1.2082 - * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
1.2083 - * {@code '\u005Cu005A'}), lowercase
1.2084 - * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
1.2085 - * full width variant ({@code '\u005CuFF21'} through
1.2086 - * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
1.2087 - * {@code '\u005CuFF5A'}) forms have numeric values from 10
1.2088 - * through 35. This is independent of the Unicode specification,
1.2089 - * which does not assign numeric values to these {@code char}
1.2090 - * values.
1.2091 - * <p>
1.2092 - * If the character does not have a numeric value, then -1 is returned.
1.2093 - * If the character has a numeric value that cannot be represented as a
1.2094 - * nonnegative integer (for example, a fractional value), then -2
1.2095 - * is returned.
1.2096 - *
1.2097 - * <p><b>Note:</b> This method cannot handle <a
1.2098 - * href="#supplementary"> supplementary characters</a>. To support
1.2099 - * all Unicode characters, including supplementary characters, use
1.2100 - * the {@link #getNumericValue(int)} method.
1.2101 - *
1.2102 - * @param ch the character to be converted.
1.2103 - * @return the numeric value of the character, as a nonnegative {@code int}
1.2104 - * value; -2 if the character has a numeric value that is not a
1.2105 - * nonnegative integer; -1 if the character has no numeric value.
1.2106 - * @see Character#forDigit(int, int)
1.2107 - * @see Character#isDigit(char)
1.2108 - * @since 1.1
1.2109 - */
1.2110 - public static int getNumericValue(char ch) {
1.2111 - return getNumericValue((int)ch);
1.2112 - }
1.2113 -
1.2114 - /**
1.2115 - * Returns the {@code int} value that the specified
1.2116 - * character (Unicode code point) represents. For example, the character
1.2117 - * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
1.2118 - * an {@code int} with a value of 50.
1.2119 - * <p>
1.2120 - * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
1.2121 - * {@code '\u005Cu005A'}), lowercase
1.2122 - * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
1.2123 - * full width variant ({@code '\u005CuFF21'} through
1.2124 - * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
1.2125 - * {@code '\u005CuFF5A'}) forms have numeric values from 10
1.2126 - * through 35. This is independent of the Unicode specification,
1.2127 - * which does not assign numeric values to these {@code char}
1.2128 - * values.
1.2129 - * <p>
1.2130 - * If the character does not have a numeric value, then -1 is returned.
1.2131 - * If the character has a numeric value that cannot be represented as a
1.2132 - * nonnegative integer (for example, a fractional value), then -2
1.2133 - * is returned.
1.2134 - *
1.2135 - * @param codePoint the character (Unicode code point) to be converted.
1.2136 - * @return the numeric value of the character, as a nonnegative {@code int}
1.2137 - * value; -2 if the character has a numeric value that is not a
1.2138 - * nonnegative integer; -1 if the character has no numeric value.
1.2139 - * @see Character#forDigit(int, int)
1.2140 - * @see Character#isDigit(int)
1.2141 - * @since 1.5
1.2142 - */
1.2143 - public static int getNumericValue(int codePoint) {
1.2144 - throw new UnsupportedOperationException();
1.2145 - }
1.2146 -
1.2147 - /**
1.2148 - * Determines if the specified character is ISO-LATIN-1 white space.
1.2149 - * This method returns {@code true} for the following five
1.2150 - * characters only:
1.2151 - * <table>
1.2152 - * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
1.2153 - * <td>{@code HORIZONTAL TABULATION}</td></tr>
1.2154 - * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
1.2155 - * <td>{@code NEW LINE}</td></tr>
1.2156 - * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
1.2157 - * <td>{@code FORM FEED}</td></tr>
1.2158 - * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
1.2159 - * <td>{@code CARRIAGE RETURN}</td></tr>
1.2160 - * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
1.2161 - * <td>{@code SPACE}</td></tr>
1.2162 - * </table>
1.2163 - *
1.2164 - * @param ch the character to be tested.
1.2165 - * @return {@code true} if the character is ISO-LATIN-1 white
1.2166 - * space; {@code false} otherwise.
1.2167 - * @see Character#isSpaceChar(char)
1.2168 - * @see Character#isWhitespace(char)
1.2169 - * @deprecated Replaced by isWhitespace(char).
1.2170 - */
1.2171 - @Deprecated
1.2172 - public static boolean isSpace(char ch) {
1.2173 - return (ch <= 0x0020) &&
1.2174 - (((((1L << 0x0009) |
1.2175 - (1L << 0x000A) |
1.2176 - (1L << 0x000C) |
1.2177 - (1L << 0x000D) |
1.2178 - (1L << 0x0020)) >> ch) & 1L) != 0);
1.2179 - }
1.2180 -
1.2181 -
1.2182 -
1.2183 - /**
1.2184 - * Determines if the specified character is white space according to Java.
1.2185 - * A character is a Java whitespace character if and only if it satisfies
1.2186 - * one of the following criteria:
1.2187 - * <ul>
1.2188 - * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
1.2189 - * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
1.2190 - * but is not also a non-breaking space ({@code '\u005Cu00A0'},
1.2191 - * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
1.2192 - * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
1.2193 - * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
1.2194 - * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
1.2195 - * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
1.2196 - * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
1.2197 - * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
1.2198 - * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
1.2199 - * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
1.2200 - * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
1.2201 - * </ul>
1.2202 - *
1.2203 - * <p><b>Note:</b> This method cannot handle <a
1.2204 - * href="#supplementary"> supplementary characters</a>. To support
1.2205 - * all Unicode characters, including supplementary characters, use
1.2206 - * the {@link #isWhitespace(int)} method.
1.2207 - *
1.2208 - * @param ch the character to be tested.
1.2209 - * @return {@code true} if the character is a Java whitespace
1.2210 - * character; {@code false} otherwise.
1.2211 - * @see Character#isSpaceChar(char)
1.2212 - * @since 1.1
1.2213 - */
1.2214 - public static boolean isWhitespace(char ch) {
1.2215 - return isWhitespace((int)ch);
1.2216 - }
1.2217 -
1.2218 - /**
1.2219 - * Determines if the specified character (Unicode code point) is
1.2220 - * white space according to Java. A character is a Java
1.2221 - * whitespace character if and only if it satisfies one of the
1.2222 - * following criteria:
1.2223 - * <ul>
1.2224 - * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
1.2225 - * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
1.2226 - * but is not also a non-breaking space ({@code '\u005Cu00A0'},
1.2227 - * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
1.2228 - * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
1.2229 - * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
1.2230 - * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
1.2231 - * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
1.2232 - * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
1.2233 - * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
1.2234 - * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
1.2235 - * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
1.2236 - * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
1.2237 - * </ul>
1.2238 - * <p>
1.2239 - *
1.2240 - * @param codePoint the character (Unicode code point) to be tested.
1.2241 - * @return {@code true} if the character is a Java whitespace
1.2242 - * character; {@code false} otherwise.
1.2243 - * @see Character#isSpaceChar(int)
1.2244 - * @since 1.5
1.2245 - */
1.2246 - public static boolean isWhitespace(int codePoint) {
1.2247 - throw new UnsupportedOperationException();
1.2248 - }
1.2249 -
1.2250 - /**
1.2251 - * Determines if the specified character is an ISO control
1.2252 - * character. A character is considered to be an ISO control
1.2253 - * character if its code is in the range {@code '\u005Cu0000'}
1.2254 - * through {@code '\u005Cu001F'} or in the range
1.2255 - * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
1.2256 - *
1.2257 - * <p><b>Note:</b> This method cannot handle <a
1.2258 - * href="#supplementary"> supplementary characters</a>. To support
1.2259 - * all Unicode characters, including supplementary characters, use
1.2260 - * the {@link #isISOControl(int)} method.
1.2261 - *
1.2262 - * @param ch the character to be tested.
1.2263 - * @return {@code true} if the character is an ISO control character;
1.2264 - * {@code false} otherwise.
1.2265 - *
1.2266 - * @see Character#isSpaceChar(char)
1.2267 - * @see Character#isWhitespace(char)
1.2268 - * @since 1.1
1.2269 - */
1.2270 - public static boolean isISOControl(char ch) {
1.2271 - return isISOControl((int)ch);
1.2272 - }
1.2273 -
1.2274 - /**
1.2275 - * Determines if the referenced character (Unicode code point) is an ISO control
1.2276 - * character. A character is considered to be an ISO control
1.2277 - * character if its code is in the range {@code '\u005Cu0000'}
1.2278 - * through {@code '\u005Cu001F'} or in the range
1.2279 - * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
1.2280 - *
1.2281 - * @param codePoint the character (Unicode code point) to be tested.
1.2282 - * @return {@code true} if the character is an ISO control character;
1.2283 - * {@code false} otherwise.
1.2284 - * @see Character#isSpaceChar(int)
1.2285 - * @see Character#isWhitespace(int)
1.2286 - * @since 1.5
1.2287 - */
1.2288 - public static boolean isISOControl(int codePoint) {
1.2289 - // Optimized form of:
1.2290 - // (codePoint >= 0x00 && codePoint <= 0x1F) ||
1.2291 - // (codePoint >= 0x7F && codePoint <= 0x9F);
1.2292 - return codePoint <= 0x9F &&
1.2293 - (codePoint >= 0x7F || (codePoint >>> 5 == 0));
1.2294 - }
1.2295 -
1.2296 - /**
1.2297 - * Determines the character representation for a specific digit in
1.2298 - * the specified radix. If the value of {@code radix} is not a
1.2299 - * valid radix, or the value of {@code digit} is not a valid
1.2300 - * digit in the specified radix, the null character
1.2301 - * ({@code '\u005Cu0000'}) is returned.
1.2302 - * <p>
1.2303 - * The {@code radix} argument is valid if it is greater than or
1.2304 - * equal to {@code MIN_RADIX} and less than or equal to
1.2305 - * {@code MAX_RADIX}. The {@code digit} argument is valid if
1.2306 - * {@code 0 <= digit < radix}.
1.2307 - * <p>
1.2308 - * If the digit is less than 10, then
1.2309 - * {@code '0' + digit} is returned. Otherwise, the value
1.2310 - * {@code 'a' + digit - 10} is returned.
1.2311 - *
1.2312 - * @param digit the number to convert to a character.
1.2313 - * @param radix the radix.
1.2314 - * @return the {@code char} representation of the specified digit
1.2315 - * in the specified radix.
1.2316 - * @see Character#MIN_RADIX
1.2317 - * @see Character#MAX_RADIX
1.2318 - * @see Character#digit(char, int)
1.2319 - */
1.2320 - public static char forDigit(int digit, int radix) {
1.2321 - if ((digit >= radix) || (digit < 0)) {
1.2322 - return '\0';
1.2323 - }
1.2324 - if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
1.2325 - return '\0';
1.2326 - }
1.2327 - if (digit < 10) {
1.2328 - return (char)('0' + digit);
1.2329 - }
1.2330 - return (char)('a' - 10 + digit);
1.2331 - }
1.2332 -
1.2333 - /**
1.2334 - * Compares two {@code Character} objects numerically.
1.2335 - *
1.2336 - * @param anotherCharacter the {@code Character} to be compared.
1.2337 -
1.2338 - * @return the value {@code 0} if the argument {@code Character}
1.2339 - * is equal to this {@code Character}; a value less than
1.2340 - * {@code 0} if this {@code Character} is numerically less
1.2341 - * than the {@code Character} argument; and a value greater than
1.2342 - * {@code 0} if this {@code Character} is numerically greater
1.2343 - * than the {@code Character} argument (unsigned comparison).
1.2344 - * Note that this is strictly a numerical comparison; it is not
1.2345 - * locale-dependent.
1.2346 - * @since 1.2
1.2347 - */
1.2348 - public int compareTo(Character anotherCharacter) {
1.2349 - return compare(this.value, anotherCharacter.value);
1.2350 - }
1.2351 -
1.2352 - /**
1.2353 - * Compares two {@code char} values numerically.
1.2354 - * The value returned is identical to what would be returned by:
1.2355 - * <pre>
1.2356 - * Character.valueOf(x).compareTo(Character.valueOf(y))
1.2357 - * </pre>
1.2358 - *
1.2359 - * @param x the first {@code char} to compare
1.2360 - * @param y the second {@code char} to compare
1.2361 - * @return the value {@code 0} if {@code x == y};
1.2362 - * a value less than {@code 0} if {@code x < y}; and
1.2363 - * a value greater than {@code 0} if {@code x > y}
1.2364 - * @since 1.7
1.2365 - */
1.2366 - public static int compare(char x, char y) {
1.2367 - return x - y;
1.2368 - }
1.2369 -
1.2370 -
1.2371 - /**
1.2372 - * The number of bits used to represent a <tt>char</tt> value in unsigned
1.2373 - * binary form, constant {@code 16}.
1.2374 - *
1.2375 - * @since 1.5
1.2376 - */
1.2377 - public static final int SIZE = 16;
1.2378 -
1.2379 - /**
1.2380 - * Returns the value obtained by reversing the order of the bytes in the
1.2381 - * specified <tt>char</tt> value.
1.2382 - *
1.2383 - * @return the value obtained by reversing (or, equivalently, swapping)
1.2384 - * the bytes in the specified <tt>char</tt> value.
1.2385 - * @since 1.5
1.2386 - */
1.2387 - public static char reverseBytes(char ch) {
1.2388 - return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
1.2389 - }
1.2390 -
1.2391 -}