1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/emul/mini/src/main/java/java/lang/Character.java Wed Jan 23 20:39:23 2013 +0100
1.3 @@ -0,0 +1,2382 @@
1.4 +/*
1.5 + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1.7 + *
1.8 + * This code is free software; you can redistribute it and/or modify it
1.9 + * under the terms of the GNU General Public License version 2 only, as
1.10 + * published by the Free Software Foundation. Oracle designates this
1.11 + * particular file as subject to the "Classpath" exception as provided
1.12 + * by Oracle in the LICENSE file that accompanied this code.
1.13 + *
1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1.17 + * version 2 for more details (a copy is included in the LICENSE file that
1.18 + * accompanied this code).
1.19 + *
1.20 + * You should have received a copy of the GNU General Public License version
1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1.23 + *
1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1.25 + * or visit www.oracle.com if you need additional information or have any
1.26 + * questions.
1.27 + */
1.28 +
1.29 +package java.lang;
1.30 +
1.31 +import org.apidesign.bck2brwsr.core.JavaScriptBody;
1.32 +
1.33 +/**
1.34 + * The {@code Character} class wraps a value of the primitive
1.35 + * type {@code char} in an object. An object of type
1.36 + * {@code Character} contains a single field whose type is
1.37 + * {@code char}.
1.38 + * <p>
1.39 + * In addition, this class provides several methods for determining
1.40 + * a character's category (lowercase letter, digit, etc.) and for converting
1.41 + * characters from uppercase to lowercase and vice versa.
1.42 + * <p>
1.43 + * Character information is based on the Unicode Standard, version 6.0.0.
1.44 + * <p>
1.45 + * The methods and data of class {@code Character} are defined by
1.46 + * the information in the <i>UnicodeData</i> file that is part of the
1.47 + * Unicode Character Database maintained by the Unicode
1.48 + * Consortium. This file specifies various properties including name
1.49 + * and general category for every defined Unicode code point or
1.50 + * character range.
1.51 + * <p>
1.52 + * The file and its description are available from the Unicode Consortium at:
1.53 + * <ul>
1.54 + * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
1.55 + * </ul>
1.56 + *
1.57 + * <h4><a name="unicode">Unicode Character Representations</a></h4>
1.58 + *
1.59 + * <p>The {@code char} data type (and therefore the value that a
1.60 + * {@code Character} object encapsulates) are based on the
1.61 + * original Unicode specification, which defined characters as
1.62 + * fixed-width 16-bit entities. The Unicode Standard has since been
1.63 + * changed to allow for characters whose representation requires more
1.64 + * than 16 bits. The range of legal <em>code point</em>s is now
1.65 + * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
1.66 + * (Refer to the <a
1.67 + * href="http://www.unicode.org/reports/tr27/#notation"><i>
1.68 + * definition</i></a> of the U+<i>n</i> notation in the Unicode
1.69 + * Standard.)
1.70 + *
1.71 + * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
1.72 + * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
1.73 + * <a name="supplementary">Characters</a> whose code points are greater
1.74 + * than U+FFFF are called <em>supplementary character</em>s. The Java
1.75 + * platform uses the UTF-16 representation in {@code char} arrays and
1.76 + * in the {@code String} and {@code StringBuffer} classes. In
1.77 + * this representation, supplementary characters are represented as a pair
1.78 + * of {@code char} values, the first from the <em>high-surrogates</em>
1.79 + * range, (\uD800-\uDBFF), the second from the
1.80 + * <em>low-surrogates</em> range (\uDC00-\uDFFF).
1.81 + *
1.82 + * <p>A {@code char} value, therefore, represents Basic
1.83 + * Multilingual Plane (BMP) code points, including the surrogate
1.84 + * code points, or code units of the UTF-16 encoding. An
1.85 + * {@code int} value represents all Unicode code points,
1.86 + * including supplementary code points. The lower (least significant)
1.87 + * 21 bits of {@code int} are used to represent Unicode code
1.88 + * points and the upper (most significant) 11 bits must be zero.
1.89 + * Unless otherwise specified, the behavior with respect to
1.90 + * supplementary characters and surrogate {@code char} values is
1.91 + * as follows:
1.92 + *
1.93 + * <ul>
1.94 + * <li>The methods that only accept a {@code char} value cannot support
1.95 + * supplementary characters. They treat {@code char} values from the
1.96 + * surrogate ranges as undefined characters. For example,
1.97 + * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
1.98 + * this specific value if followed by any low-surrogate value in a string
1.99 + * would represent a letter.
1.100 + *
1.101 + * <li>The methods that accept an {@code int} value support all
1.102 + * Unicode characters, including supplementary characters. For
1.103 + * example, {@code Character.isLetter(0x2F81A)} returns
1.104 + * {@code true} because the code point value represents a letter
1.105 + * (a CJK ideograph).
1.106 + * </ul>
1.107 + *
1.108 + * <p>In the Java SE API documentation, <em>Unicode code point</em> is
1.109 + * used for character values in the range between U+0000 and U+10FFFF,
1.110 + * and <em>Unicode code unit</em> is used for 16-bit
1.111 + * {@code char} values that are code units of the <em>UTF-16</em>
1.112 + * encoding. For more information on Unicode terminology, refer to the
1.113 + * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
1.114 + *
1.115 + * @author Lee Boynton
1.116 + * @author Guy Steele
1.117 + * @author Akira Tanaka
1.118 + * @author Martin Buchholz
1.119 + * @author Ulf Zibis
1.120 + * @since 1.0
1.121 + */
1.122 +public final
1.123 +class Character implements java.io.Serializable, Comparable<Character> {
1.124 + /**
1.125 + * The minimum radix available for conversion to and from strings.
1.126 + * The constant value of this field is the smallest value permitted
1.127 + * for the radix argument in radix-conversion methods such as the
1.128 + * {@code digit} method, the {@code forDigit} method, and the
1.129 + * {@code toString} method of class {@code Integer}.
1.130 + *
1.131 + * @see Character#digit(char, int)
1.132 + * @see Character#forDigit(int, int)
1.133 + * @see Integer#toString(int, int)
1.134 + * @see Integer#valueOf(String)
1.135 + */
1.136 + public static final int MIN_RADIX = 2;
1.137 +
1.138 + /**
1.139 + * The maximum radix available for conversion to and from strings.
1.140 + * The constant value of this field is the largest value permitted
1.141 + * for the radix argument in radix-conversion methods such as the
1.142 + * {@code digit} method, the {@code forDigit} method, and the
1.143 + * {@code toString} method of class {@code Integer}.
1.144 + *
1.145 + * @see Character#digit(char, int)
1.146 + * @see Character#forDigit(int, int)
1.147 + * @see Integer#toString(int, int)
1.148 + * @see Integer#valueOf(String)
1.149 + */
1.150 + public static final int MAX_RADIX = 36;
1.151 +
1.152 + /**
1.153 + * The constant value of this field is the smallest value of type
1.154 + * {@code char}, {@code '\u005Cu0000'}.
1.155 + *
1.156 + * @since 1.0.2
1.157 + */
1.158 + public static final char MIN_VALUE = '\u0000';
1.159 +
1.160 + /**
1.161 + * The constant value of this field is the largest value of type
1.162 + * {@code char}, {@code '\u005CuFFFF'}.
1.163 + *
1.164 + * @since 1.0.2
1.165 + */
1.166 + public static final char MAX_VALUE = '\uFFFF';
1.167 +
1.168 + /**
1.169 + * The {@code Class} instance representing the primitive type
1.170 + * {@code char}.
1.171 + *
1.172 + * @since 1.1
1.173 + */
1.174 + public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
1.175 +
1.176 + /*
1.177 + * Normative general types
1.178 + */
1.179 +
1.180 + /*
1.181 + * General character types
1.182 + */
1.183 +
1.184 + /**
1.185 + * General category "Cn" in the Unicode specification.
1.186 + * @since 1.1
1.187 + */
1.188 + public static final byte UNASSIGNED = 0;
1.189 +
1.190 + /**
1.191 + * General category "Lu" in the Unicode specification.
1.192 + * @since 1.1
1.193 + */
1.194 + public static final byte UPPERCASE_LETTER = 1;
1.195 +
1.196 + /**
1.197 + * General category "Ll" in the Unicode specification.
1.198 + * @since 1.1
1.199 + */
1.200 + public static final byte LOWERCASE_LETTER = 2;
1.201 +
1.202 + /**
1.203 + * General category "Lt" in the Unicode specification.
1.204 + * @since 1.1
1.205 + */
1.206 + public static final byte TITLECASE_LETTER = 3;
1.207 +
1.208 + /**
1.209 + * General category "Lm" in the Unicode specification.
1.210 + * @since 1.1
1.211 + */
1.212 + public static final byte MODIFIER_LETTER = 4;
1.213 +
1.214 + /**
1.215 + * General category "Lo" in the Unicode specification.
1.216 + * @since 1.1
1.217 + */
1.218 + public static final byte OTHER_LETTER = 5;
1.219 +
1.220 + /**
1.221 + * General category "Mn" in the Unicode specification.
1.222 + * @since 1.1
1.223 + */
1.224 + public static final byte NON_SPACING_MARK = 6;
1.225 +
1.226 + /**
1.227 + * General category "Me" in the Unicode specification.
1.228 + * @since 1.1
1.229 + */
1.230 + public static final byte ENCLOSING_MARK = 7;
1.231 +
1.232 + /**
1.233 + * General category "Mc" in the Unicode specification.
1.234 + * @since 1.1
1.235 + */
1.236 + public static final byte COMBINING_SPACING_MARK = 8;
1.237 +
1.238 + /**
1.239 + * General category "Nd" in the Unicode specification.
1.240 + * @since 1.1
1.241 + */
1.242 + public static final byte DECIMAL_DIGIT_NUMBER = 9;
1.243 +
1.244 + /**
1.245 + * General category "Nl" in the Unicode specification.
1.246 + * @since 1.1
1.247 + */
1.248 + public static final byte LETTER_NUMBER = 10;
1.249 +
1.250 + /**
1.251 + * General category "No" in the Unicode specification.
1.252 + * @since 1.1
1.253 + */
1.254 + public static final byte OTHER_NUMBER = 11;
1.255 +
1.256 + /**
1.257 + * General category "Zs" in the Unicode specification.
1.258 + * @since 1.1
1.259 + */
1.260 + public static final byte SPACE_SEPARATOR = 12;
1.261 +
1.262 + /**
1.263 + * General category "Zl" in the Unicode specification.
1.264 + * @since 1.1
1.265 + */
1.266 + public static final byte LINE_SEPARATOR = 13;
1.267 +
1.268 + /**
1.269 + * General category "Zp" in the Unicode specification.
1.270 + * @since 1.1
1.271 + */
1.272 + public static final byte PARAGRAPH_SEPARATOR = 14;
1.273 +
1.274 + /**
1.275 + * General category "Cc" in the Unicode specification.
1.276 + * @since 1.1
1.277 + */
1.278 + public static final byte CONTROL = 15;
1.279 +
1.280 + /**
1.281 + * General category "Cf" in the Unicode specification.
1.282 + * @since 1.1
1.283 + */
1.284 + public static final byte FORMAT = 16;
1.285 +
1.286 + /**
1.287 + * General category "Co" in the Unicode specification.
1.288 + * @since 1.1
1.289 + */
1.290 + public static final byte PRIVATE_USE = 18;
1.291 +
1.292 + /**
1.293 + * General category "Cs" in the Unicode specification.
1.294 + * @since 1.1
1.295 + */
1.296 + public static final byte SURROGATE = 19;
1.297 +
1.298 + /**
1.299 + * General category "Pd" in the Unicode specification.
1.300 + * @since 1.1
1.301 + */
1.302 + public static final byte DASH_PUNCTUATION = 20;
1.303 +
1.304 + /**
1.305 + * General category "Ps" in the Unicode specification.
1.306 + * @since 1.1
1.307 + */
1.308 + public static final byte START_PUNCTUATION = 21;
1.309 +
1.310 + /**
1.311 + * General category "Pe" in the Unicode specification.
1.312 + * @since 1.1
1.313 + */
1.314 + public static final byte END_PUNCTUATION = 22;
1.315 +
1.316 + /**
1.317 + * General category "Pc" in the Unicode specification.
1.318 + * @since 1.1
1.319 + */
1.320 + public static final byte CONNECTOR_PUNCTUATION = 23;
1.321 +
1.322 + /**
1.323 + * General category "Po" in the Unicode specification.
1.324 + * @since 1.1
1.325 + */
1.326 + public static final byte OTHER_PUNCTUATION = 24;
1.327 +
1.328 + /**
1.329 + * General category "Sm" in the Unicode specification.
1.330 + * @since 1.1
1.331 + */
1.332 + public static final byte MATH_SYMBOL = 25;
1.333 +
1.334 + /**
1.335 + * General category "Sc" in the Unicode specification.
1.336 + * @since 1.1
1.337 + */
1.338 + public static final byte CURRENCY_SYMBOL = 26;
1.339 +
1.340 + /**
1.341 + * General category "Sk" in the Unicode specification.
1.342 + * @since 1.1
1.343 + */
1.344 + public static final byte MODIFIER_SYMBOL = 27;
1.345 +
1.346 + /**
1.347 + * General category "So" in the Unicode specification.
1.348 + * @since 1.1
1.349 + */
1.350 + public static final byte OTHER_SYMBOL = 28;
1.351 +
1.352 + /**
1.353 + * General category "Pi" in the Unicode specification.
1.354 + * @since 1.4
1.355 + */
1.356 + public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1.357 +
1.358 + /**
1.359 + * General category "Pf" in the Unicode specification.
1.360 + * @since 1.4
1.361 + */
1.362 + public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1.363 +
1.364 + /**
1.365 + * Error flag. Use int (code point) to avoid confusion with U+FFFF.
1.366 + */
1.367 + static final int ERROR = 0xFFFFFFFF;
1.368 +
1.369 +
1.370 + /**
1.371 + * Undefined bidirectional character type. Undefined {@code char}
1.372 + * values have undefined directionality in the Unicode specification.
1.373 + * @since 1.4
1.374 + */
1.375 + public static final byte DIRECTIONALITY_UNDEFINED = -1;
1.376 +
1.377 + /**
1.378 + * Strong bidirectional character type "L" in the Unicode specification.
1.379 + * @since 1.4
1.380 + */
1.381 + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1.382 +
1.383 + /**
1.384 + * Strong bidirectional character type "R" in the Unicode specification.
1.385 + * @since 1.4
1.386 + */
1.387 + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1.388 +
1.389 + /**
1.390 + * Strong bidirectional character type "AL" in the Unicode specification.
1.391 + * @since 1.4
1.392 + */
1.393 + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1.394 +
1.395 + /**
1.396 + * Weak bidirectional character type "EN" in the Unicode specification.
1.397 + * @since 1.4
1.398 + */
1.399 + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1.400 +
1.401 + /**
1.402 + * Weak bidirectional character type "ES" in the Unicode specification.
1.403 + * @since 1.4
1.404 + */
1.405 + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1.406 +
1.407 + /**
1.408 + * Weak bidirectional character type "ET" in the Unicode specification.
1.409 + * @since 1.4
1.410 + */
1.411 + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1.412 +
1.413 + /**
1.414 + * Weak bidirectional character type "AN" in the Unicode specification.
1.415 + * @since 1.4
1.416 + */
1.417 + public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1.418 +
1.419 + /**
1.420 + * Weak bidirectional character type "CS" in the Unicode specification.
1.421 + * @since 1.4
1.422 + */
1.423 + public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1.424 +
1.425 + /**
1.426 + * Weak bidirectional character type "NSM" in the Unicode specification.
1.427 + * @since 1.4
1.428 + */
1.429 + public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1.430 +
1.431 + /**
1.432 + * Weak bidirectional character type "BN" in the Unicode specification.
1.433 + * @since 1.4
1.434 + */
1.435 + public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1.436 +
1.437 + /**
1.438 + * Neutral bidirectional character type "B" in the Unicode specification.
1.439 + * @since 1.4
1.440 + */
1.441 + public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1.442 +
1.443 + /**
1.444 + * Neutral bidirectional character type "S" in the Unicode specification.
1.445 + * @since 1.4
1.446 + */
1.447 + public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1.448 +
1.449 + /**
1.450 + * Neutral bidirectional character type "WS" in the Unicode specification.
1.451 + * @since 1.4
1.452 + */
1.453 + public static final byte DIRECTIONALITY_WHITESPACE = 12;
1.454 +
1.455 + /**
1.456 + * Neutral bidirectional character type "ON" in the Unicode specification.
1.457 + * @since 1.4
1.458 + */
1.459 + public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1.460 +
1.461 + /**
1.462 + * Strong bidirectional character type "LRE" in the Unicode specification.
1.463 + * @since 1.4
1.464 + */
1.465 + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1.466 +
1.467 + /**
1.468 + * Strong bidirectional character type "LRO" in the Unicode specification.
1.469 + * @since 1.4
1.470 + */
1.471 + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1.472 +
1.473 + /**
1.474 + * Strong bidirectional character type "RLE" in the Unicode specification.
1.475 + * @since 1.4
1.476 + */
1.477 + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1.478 +
1.479 + /**
1.480 + * Strong bidirectional character type "RLO" in the Unicode specification.
1.481 + * @since 1.4
1.482 + */
1.483 + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1.484 +
1.485 + /**
1.486 + * Weak bidirectional character type "PDF" in the Unicode specification.
1.487 + * @since 1.4
1.488 + */
1.489 + public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1.490 +
1.491 + /**
1.492 + * The minimum value of a
1.493 + * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.494 + * Unicode high-surrogate code unit</a>
1.495 + * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
1.496 + * A high-surrogate is also known as a <i>leading-surrogate</i>.
1.497 + *
1.498 + * @since 1.5
1.499 + */
1.500 + public static final char MIN_HIGH_SURROGATE = '\uD800';
1.501 +
1.502 + /**
1.503 + * The maximum value of a
1.504 + * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.505 + * Unicode high-surrogate code unit</a>
1.506 + * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
1.507 + * A high-surrogate is also known as a <i>leading-surrogate</i>.
1.508 + *
1.509 + * @since 1.5
1.510 + */
1.511 + public static final char MAX_HIGH_SURROGATE = '\uDBFF';
1.512 +
1.513 + /**
1.514 + * The minimum value of a
1.515 + * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.516 + * Unicode low-surrogate code unit</a>
1.517 + * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
1.518 + * A low-surrogate is also known as a <i>trailing-surrogate</i>.
1.519 + *
1.520 + * @since 1.5
1.521 + */
1.522 + public static final char MIN_LOW_SURROGATE = '\uDC00';
1.523 +
1.524 + /**
1.525 + * The maximum value of a
1.526 + * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.527 + * Unicode low-surrogate code unit</a>
1.528 + * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
1.529 + * A low-surrogate is also known as a <i>trailing-surrogate</i>.
1.530 + *
1.531 + * @since 1.5
1.532 + */
1.533 + public static final char MAX_LOW_SURROGATE = '\uDFFF';
1.534 +
1.535 + /**
1.536 + * The minimum value of a Unicode surrogate code unit in the
1.537 + * UTF-16 encoding, constant {@code '\u005CuD800'}.
1.538 + *
1.539 + * @since 1.5
1.540 + */
1.541 + public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
1.542 +
1.543 + /**
1.544 + * The maximum value of a Unicode surrogate code unit in the
1.545 + * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
1.546 + *
1.547 + * @since 1.5
1.548 + */
1.549 + public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
1.550 +
1.551 + /**
1.552 + * The minimum value of a
1.553 + * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
1.554 + * Unicode supplementary code point</a>, constant {@code U+10000}.
1.555 + *
1.556 + * @since 1.5
1.557 + */
1.558 + public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
1.559 +
1.560 + /**
1.561 + * The minimum value of a
1.562 + * <a href="http://www.unicode.org/glossary/#code_point">
1.563 + * Unicode code point</a>, constant {@code U+0000}.
1.564 + *
1.565 + * @since 1.5
1.566 + */
1.567 + public static final int MIN_CODE_POINT = 0x000000;
1.568 +
1.569 + /**
1.570 + * The maximum value of a
1.571 + * <a href="http://www.unicode.org/glossary/#code_point">
1.572 + * Unicode code point</a>, constant {@code U+10FFFF}.
1.573 + *
1.574 + * @since 1.5
1.575 + */
1.576 + public static final int MAX_CODE_POINT = 0X10FFFF;
1.577 +
1.578 +
1.579 + /**
1.580 + * Instances of this class represent particular subsets of the Unicode
1.581 + * character set. The only family of subsets defined in the
1.582 + * {@code Character} class is {@link Character.UnicodeBlock}.
1.583 + * Other portions of the Java API may define other subsets for their
1.584 + * own purposes.
1.585 + *
1.586 + * @since 1.2
1.587 + */
1.588 + public static class Subset {
1.589 +
1.590 + private String name;
1.591 +
1.592 + /**
1.593 + * Constructs a new {@code Subset} instance.
1.594 + *
1.595 + * @param name The name of this subset
1.596 + * @exception NullPointerException if name is {@code null}
1.597 + */
1.598 + protected Subset(String name) {
1.599 + if (name == null) {
1.600 + throw new NullPointerException("name");
1.601 + }
1.602 + this.name = name;
1.603 + }
1.604 +
1.605 + /**
1.606 + * Compares two {@code Subset} objects for equality.
1.607 + * This method returns {@code true} if and only if
1.608 + * {@code this} and the argument refer to the same
1.609 + * object; since this method is {@code final}, this
1.610 + * guarantee holds for all subclasses.
1.611 + */
1.612 + public final boolean equals(Object obj) {
1.613 + return (this == obj);
1.614 + }
1.615 +
1.616 + /**
1.617 + * Returns the standard hash code as defined by the
1.618 + * {@link Object#hashCode} method. This method
1.619 + * is {@code final} in order to ensure that the
1.620 + * {@code equals} and {@code hashCode} methods will
1.621 + * be consistent in all subclasses.
1.622 + */
1.623 + public final int hashCode() {
1.624 + return super.hashCode();
1.625 + }
1.626 +
1.627 + /**
1.628 + * Returns the name of this subset.
1.629 + */
1.630 + public final String toString() {
1.631 + return name;
1.632 + }
1.633 + }
1.634 +
1.635 + // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
1.636 + // for the latest specification of Unicode Blocks.
1.637 +
1.638 +
1.639 + /**
1.640 + * The value of the {@code Character}.
1.641 + *
1.642 + * @serial
1.643 + */
1.644 + private final char value;
1.645 +
1.646 + /** use serialVersionUID from JDK 1.0.2 for interoperability */
1.647 + private static final long serialVersionUID = 3786198910865385080L;
1.648 +
1.649 + /**
1.650 + * Constructs a newly allocated {@code Character} object that
1.651 + * represents the specified {@code char} value.
1.652 + *
1.653 + * @param value the value to be represented by the
1.654 + * {@code Character} object.
1.655 + */
1.656 + public Character(char value) {
1.657 + this.value = value;
1.658 + }
1.659 +
1.660 + private static class CharacterCache {
1.661 + private CharacterCache(){}
1.662 +
1.663 + static final Character cache[] = new Character[127 + 1];
1.664 +
1.665 + static {
1.666 + for (int i = 0; i < cache.length; i++)
1.667 + cache[i] = new Character((char)i);
1.668 + }
1.669 + }
1.670 +
1.671 + /**
1.672 + * Returns a <tt>Character</tt> instance representing the specified
1.673 + * <tt>char</tt> value.
1.674 + * If a new <tt>Character</tt> instance is not required, this method
1.675 + * should generally be used in preference to the constructor
1.676 + * {@link #Character(char)}, as this method is likely to yield
1.677 + * significantly better space and time performance by caching
1.678 + * frequently requested values.
1.679 + *
1.680 + * This method will always cache values in the range {@code
1.681 + * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
1.682 + * cache other values outside of this range.
1.683 + *
1.684 + * @param c a char value.
1.685 + * @return a <tt>Character</tt> instance representing <tt>c</tt>.
1.686 + * @since 1.5
1.687 + */
1.688 + public static Character valueOf(char c) {
1.689 + if (c <= 127) { // must cache
1.690 + return CharacterCache.cache[(int)c];
1.691 + }
1.692 + return new Character(c);
1.693 + }
1.694 +
1.695 + /**
1.696 + * Returns the value of this {@code Character} object.
1.697 + * @return the primitive {@code char} value represented by
1.698 + * this object.
1.699 + */
1.700 + public char charValue() {
1.701 + return value;
1.702 + }
1.703 +
1.704 + /**
1.705 + * Returns a hash code for this {@code Character}; equal to the result
1.706 + * of invoking {@code charValue()}.
1.707 + *
1.708 + * @return a hash code value for this {@code Character}
1.709 + */
1.710 + public int hashCode() {
1.711 + return (int)value;
1.712 + }
1.713 +
1.714 + /**
1.715 + * Compares this object against the specified object.
1.716 + * The result is {@code true} if and only if the argument is not
1.717 + * {@code null} and is a {@code Character} object that
1.718 + * represents the same {@code char} value as this object.
1.719 + *
1.720 + * @param obj the object to compare with.
1.721 + * @return {@code true} if the objects are the same;
1.722 + * {@code false} otherwise.
1.723 + */
1.724 + public boolean equals(Object obj) {
1.725 + if (obj instanceof Character) {
1.726 + return value == ((Character)obj).charValue();
1.727 + }
1.728 + return false;
1.729 + }
1.730 +
1.731 + /**
1.732 + * Returns a {@code String} object representing this
1.733 + * {@code Character}'s value. The result is a string of
1.734 + * length 1 whose sole component is the primitive
1.735 + * {@code char} value represented by this
1.736 + * {@code Character} object.
1.737 + *
1.738 + * @return a string representation of this object.
1.739 + */
1.740 + public String toString() {
1.741 + char buf[] = {value};
1.742 + return String.valueOf(buf);
1.743 + }
1.744 +
1.745 + /**
1.746 + * Returns a {@code String} object representing the
1.747 + * specified {@code char}. The result is a string of length
1.748 + * 1 consisting solely of the specified {@code char}.
1.749 + *
1.750 + * @param c the {@code char} to be converted
1.751 + * @return the string representation of the specified {@code char}
1.752 + * @since 1.4
1.753 + */
1.754 + public static String toString(char c) {
1.755 + return String.valueOf(c);
1.756 + }
1.757 +
1.758 + /**
1.759 + * Determines whether the specified code point is a valid
1.760 + * <a href="http://www.unicode.org/glossary/#code_point">
1.761 + * Unicode code point value</a>.
1.762 + *
1.763 + * @param codePoint the Unicode code point to be tested
1.764 + * @return {@code true} if the specified code point value is between
1.765 + * {@link #MIN_CODE_POINT} and
1.766 + * {@link #MAX_CODE_POINT} inclusive;
1.767 + * {@code false} otherwise.
1.768 + * @since 1.5
1.769 + */
1.770 + public static boolean isValidCodePoint(int codePoint) {
1.771 + // Optimized form of:
1.772 + // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
1.773 + int plane = codePoint >>> 16;
1.774 + return plane < ((MAX_CODE_POINT + 1) >>> 16);
1.775 + }
1.776 +
1.777 + /**
1.778 + * Determines whether the specified character (Unicode code point)
1.779 + * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
1.780 + * Such code points can be represented using a single {@code char}.
1.781 + *
1.782 + * @param codePoint the character (Unicode code point) to be tested
1.783 + * @return {@code true} if the specified code point is between
1.784 + * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
1.785 + * {@code false} otherwise.
1.786 + * @since 1.7
1.787 + */
1.788 + public static boolean isBmpCodePoint(int codePoint) {
1.789 + return codePoint >>> 16 == 0;
1.790 + // Optimized form of:
1.791 + // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
1.792 + // We consistently use logical shift (>>>) to facilitate
1.793 + // additional runtime optimizations.
1.794 + }
1.795 +
1.796 + /**
1.797 + * Determines whether the specified character (Unicode code point)
1.798 + * is in the <a href="#supplementary">supplementary character</a> range.
1.799 + *
1.800 + * @param codePoint the character (Unicode code point) to be tested
1.801 + * @return {@code true} if the specified code point is between
1.802 + * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
1.803 + * {@link #MAX_CODE_POINT} inclusive;
1.804 + * {@code false} otherwise.
1.805 + * @since 1.5
1.806 + */
1.807 + public static boolean isSupplementaryCodePoint(int codePoint) {
1.808 + return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
1.809 + && codePoint < MAX_CODE_POINT + 1;
1.810 + }
1.811 +
1.812 + /**
1.813 + * Determines if the given {@code char} value is a
1.814 + * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.815 + * Unicode high-surrogate code unit</a>
1.816 + * (also known as <i>leading-surrogate code unit</i>).
1.817 + *
1.818 + * <p>Such values do not represent characters by themselves,
1.819 + * but are used in the representation of
1.820 + * <a href="#supplementary">supplementary characters</a>
1.821 + * in the UTF-16 encoding.
1.822 + *
1.823 + * @param ch the {@code char} value to be tested.
1.824 + * @return {@code true} if the {@code char} value is between
1.825 + * {@link #MIN_HIGH_SURROGATE} and
1.826 + * {@link #MAX_HIGH_SURROGATE} inclusive;
1.827 + * {@code false} otherwise.
1.828 + * @see Character#isLowSurrogate(char)
1.829 + * @see Character.UnicodeBlock#of(int)
1.830 + * @since 1.5
1.831 + */
1.832 + public static boolean isHighSurrogate(char ch) {
1.833 + // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
1.834 + return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
1.835 + }
1.836 +
1.837 + /**
1.838 + * Determines if the given {@code char} value is a
1.839 + * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.840 + * Unicode low-surrogate code unit</a>
1.841 + * (also known as <i>trailing-surrogate code unit</i>).
1.842 + *
1.843 + * <p>Such values do not represent characters by themselves,
1.844 + * but are used in the representation of
1.845 + * <a href="#supplementary">supplementary characters</a>
1.846 + * in the UTF-16 encoding.
1.847 + *
1.848 + * @param ch the {@code char} value to be tested.
1.849 + * @return {@code true} if the {@code char} value is between
1.850 + * {@link #MIN_LOW_SURROGATE} and
1.851 + * {@link #MAX_LOW_SURROGATE} inclusive;
1.852 + * {@code false} otherwise.
1.853 + * @see Character#isHighSurrogate(char)
1.854 + * @since 1.5
1.855 + */
1.856 + public static boolean isLowSurrogate(char ch) {
1.857 + return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
1.858 + }
1.859 +
1.860 + /**
1.861 + * Determines if the given {@code char} value is a Unicode
1.862 + * <i>surrogate code unit</i>.
1.863 + *
1.864 + * <p>Such values do not represent characters by themselves,
1.865 + * but are used in the representation of
1.866 + * <a href="#supplementary">supplementary characters</a>
1.867 + * in the UTF-16 encoding.
1.868 + *
1.869 + * <p>A char value is a surrogate code unit if and only if it is either
1.870 + * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
1.871 + * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
1.872 + *
1.873 + * @param ch the {@code char} value to be tested.
1.874 + * @return {@code true} if the {@code char} value is between
1.875 + * {@link #MIN_SURROGATE} and
1.876 + * {@link #MAX_SURROGATE} inclusive;
1.877 + * {@code false} otherwise.
1.878 + * @since 1.7
1.879 + */
1.880 + public static boolean isSurrogate(char ch) {
1.881 + return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
1.882 + }
1.883 +
1.884 + /**
1.885 + * Determines whether the specified pair of {@code char}
1.886 + * values is a valid
1.887 + * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.888 + * Unicode surrogate pair</a>.
1.889 +
1.890 + * <p>This method is equivalent to the expression:
1.891 + * <blockquote><pre>
1.892 + * isHighSurrogate(high) && isLowSurrogate(low)
1.893 + * </pre></blockquote>
1.894 + *
1.895 + * @param high the high-surrogate code value to be tested
1.896 + * @param low the low-surrogate code value to be tested
1.897 + * @return {@code true} if the specified high and
1.898 + * low-surrogate code values represent a valid surrogate pair;
1.899 + * {@code false} otherwise.
1.900 + * @since 1.5
1.901 + */
1.902 + public static boolean isSurrogatePair(char high, char low) {
1.903 + return isHighSurrogate(high) && isLowSurrogate(low);
1.904 + }
1.905 +
1.906 + /**
1.907 + * Determines the number of {@code char} values needed to
1.908 + * represent the specified character (Unicode code point). If the
1.909 + * specified character is equal to or greater than 0x10000, then
1.910 + * the method returns 2. Otherwise, the method returns 1.
1.911 + *
1.912 + * <p>This method doesn't validate the specified character to be a
1.913 + * valid Unicode code point. The caller must validate the
1.914 + * character value using {@link #isValidCodePoint(int) isValidCodePoint}
1.915 + * if necessary.
1.916 + *
1.917 + * @param codePoint the character (Unicode code point) to be tested.
1.918 + * @return 2 if the character is a valid supplementary character; 1 otherwise.
1.919 + * @see Character#isSupplementaryCodePoint(int)
1.920 + * @since 1.5
1.921 + */
1.922 + public static int charCount(int codePoint) {
1.923 + return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
1.924 + }
1.925 +
1.926 + /**
1.927 + * Converts the specified surrogate pair to its supplementary code
1.928 + * point value. This method does not validate the specified
1.929 + * surrogate pair. The caller must validate it using {@link
1.930 + * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
1.931 + *
1.932 + * @param high the high-surrogate code unit
1.933 + * @param low the low-surrogate code unit
1.934 + * @return the supplementary code point composed from the
1.935 + * specified surrogate pair.
1.936 + * @since 1.5
1.937 + */
1.938 + public static int toCodePoint(char high, char low) {
1.939 + // Optimized form of:
1.940 + // return ((high - MIN_HIGH_SURROGATE) << 10)
1.941 + // + (low - MIN_LOW_SURROGATE)
1.942 + // + MIN_SUPPLEMENTARY_CODE_POINT;
1.943 + return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
1.944 + - (MIN_HIGH_SURROGATE << 10)
1.945 + - MIN_LOW_SURROGATE);
1.946 + }
1.947 +
1.948 + /**
1.949 + * Returns the code point at the given index of the
1.950 + * {@code CharSequence}. If the {@code char} value at
1.951 + * the given index in the {@code CharSequence} is in the
1.952 + * high-surrogate range, the following index is less than the
1.953 + * length of the {@code CharSequence}, and the
1.954 + * {@code char} value at the following index is in the
1.955 + * low-surrogate range, then the supplementary code point
1.956 + * corresponding to this surrogate pair is returned. Otherwise,
1.957 + * the {@code char} value at the given index is returned.
1.958 + *
1.959 + * @param seq a sequence of {@code char} values (Unicode code
1.960 + * units)
1.961 + * @param index the index to the {@code char} values (Unicode
1.962 + * code units) in {@code seq} to be converted
1.963 + * @return the Unicode code point at the given index
1.964 + * @exception NullPointerException if {@code seq} is null.
1.965 + * @exception IndexOutOfBoundsException if the value
1.966 + * {@code index} is negative or not less than
1.967 + * {@link CharSequence#length() seq.length()}.
1.968 + * @since 1.5
1.969 + */
1.970 + public static int codePointAt(CharSequence seq, int index) {
1.971 + char c1 = seq.charAt(index++);
1.972 + if (isHighSurrogate(c1)) {
1.973 + if (index < seq.length()) {
1.974 + char c2 = seq.charAt(index);
1.975 + if (isLowSurrogate(c2)) {
1.976 + return toCodePoint(c1, c2);
1.977 + }
1.978 + }
1.979 + }
1.980 + return c1;
1.981 + }
1.982 +
1.983 + /**
1.984 + * Returns the code point at the given index of the
1.985 + * {@code char} array. If the {@code char} value at
1.986 + * the given index in the {@code char} array is in the
1.987 + * high-surrogate range, the following index is less than the
1.988 + * length of the {@code char} array, and the
1.989 + * {@code char} value at the following index is in the
1.990 + * low-surrogate range, then the supplementary code point
1.991 + * corresponding to this surrogate pair is returned. Otherwise,
1.992 + * the {@code char} value at the given index is returned.
1.993 + *
1.994 + * @param a the {@code char} array
1.995 + * @param index the index to the {@code char} values (Unicode
1.996 + * code units) in the {@code char} array to be converted
1.997 + * @return the Unicode code point at the given index
1.998 + * @exception NullPointerException if {@code a} is null.
1.999 + * @exception IndexOutOfBoundsException if the value
1.1000 + * {@code index} is negative or not less than
1.1001 + * the length of the {@code char} array.
1.1002 + * @since 1.5
1.1003 + */
1.1004 + public static int codePointAt(char[] a, int index) {
1.1005 + return codePointAtImpl(a, index, a.length);
1.1006 + }
1.1007 +
1.1008 + /**
1.1009 + * Returns the code point at the given index of the
1.1010 + * {@code char} array, where only array elements with
1.1011 + * {@code index} less than {@code limit} can be used. If
1.1012 + * the {@code char} value at the given index in the
1.1013 + * {@code char} array is in the high-surrogate range, the
1.1014 + * following index is less than the {@code limit}, and the
1.1015 + * {@code char} value at the following index is in the
1.1016 + * low-surrogate range, then the supplementary code point
1.1017 + * corresponding to this surrogate pair is returned. Otherwise,
1.1018 + * the {@code char} value at the given index is returned.
1.1019 + *
1.1020 + * @param a the {@code char} array
1.1021 + * @param index the index to the {@code char} values (Unicode
1.1022 + * code units) in the {@code char} array to be converted
1.1023 + * @param limit the index after the last array element that
1.1024 + * can be used in the {@code char} array
1.1025 + * @return the Unicode code point at the given index
1.1026 + * @exception NullPointerException if {@code a} is null.
1.1027 + * @exception IndexOutOfBoundsException if the {@code index}
1.1028 + * argument is negative or not less than the {@code limit}
1.1029 + * argument, or if the {@code limit} argument is negative or
1.1030 + * greater than the length of the {@code char} array.
1.1031 + * @since 1.5
1.1032 + */
1.1033 + public static int codePointAt(char[] a, int index, int limit) {
1.1034 + if (index >= limit || limit < 0 || limit > a.length) {
1.1035 + throw new IndexOutOfBoundsException();
1.1036 + }
1.1037 + return codePointAtImpl(a, index, limit);
1.1038 + }
1.1039 +
1.1040 + // throws ArrayIndexOutofBoundsException if index out of bounds
1.1041 + static int codePointAtImpl(char[] a, int index, int limit) {
1.1042 + char c1 = a[index++];
1.1043 + if (isHighSurrogate(c1)) {
1.1044 + if (index < limit) {
1.1045 + char c2 = a[index];
1.1046 + if (isLowSurrogate(c2)) {
1.1047 + return toCodePoint(c1, c2);
1.1048 + }
1.1049 + }
1.1050 + }
1.1051 + return c1;
1.1052 + }
1.1053 +
1.1054 + /**
1.1055 + * Returns the code point preceding the given index of the
1.1056 + * {@code CharSequence}. If the {@code char} value at
1.1057 + * {@code (index - 1)} in the {@code CharSequence} is in
1.1058 + * the low-surrogate range, {@code (index - 2)} is not
1.1059 + * negative, and the {@code char} value at {@code (index - 2)}
1.1060 + * in the {@code CharSequence} is in the
1.1061 + * high-surrogate range, then the supplementary code point
1.1062 + * corresponding to this surrogate pair is returned. Otherwise,
1.1063 + * the {@code char} value at {@code (index - 1)} is
1.1064 + * returned.
1.1065 + *
1.1066 + * @param seq the {@code CharSequence} instance
1.1067 + * @param index the index following the code point that should be returned
1.1068 + * @return the Unicode code point value before the given index.
1.1069 + * @exception NullPointerException if {@code seq} is null.
1.1070 + * @exception IndexOutOfBoundsException if the {@code index}
1.1071 + * argument is less than 1 or greater than {@link
1.1072 + * CharSequence#length() seq.length()}.
1.1073 + * @since 1.5
1.1074 + */
1.1075 + public static int codePointBefore(CharSequence seq, int index) {
1.1076 + char c2 = seq.charAt(--index);
1.1077 + if (isLowSurrogate(c2)) {
1.1078 + if (index > 0) {
1.1079 + char c1 = seq.charAt(--index);
1.1080 + if (isHighSurrogate(c1)) {
1.1081 + return toCodePoint(c1, c2);
1.1082 + }
1.1083 + }
1.1084 + }
1.1085 + return c2;
1.1086 + }
1.1087 +
1.1088 + /**
1.1089 + * Returns the code point preceding the given index of the
1.1090 + * {@code char} array. If the {@code char} value at
1.1091 + * {@code (index - 1)} in the {@code char} array is in
1.1092 + * the low-surrogate range, {@code (index - 2)} is not
1.1093 + * negative, and the {@code char} value at {@code (index - 2)}
1.1094 + * in the {@code char} array is in the
1.1095 + * high-surrogate range, then the supplementary code point
1.1096 + * corresponding to this surrogate pair is returned. Otherwise,
1.1097 + * the {@code char} value at {@code (index - 1)} is
1.1098 + * returned.
1.1099 + *
1.1100 + * @param a the {@code char} array
1.1101 + * @param index the index following the code point that should be returned
1.1102 + * @return the Unicode code point value before the given index.
1.1103 + * @exception NullPointerException if {@code a} is null.
1.1104 + * @exception IndexOutOfBoundsException if the {@code index}
1.1105 + * argument is less than 1 or greater than the length of the
1.1106 + * {@code char} array
1.1107 + * @since 1.5
1.1108 + */
1.1109 + public static int codePointBefore(char[] a, int index) {
1.1110 + return codePointBeforeImpl(a, index, 0);
1.1111 + }
1.1112 +
1.1113 + /**
1.1114 + * Returns the code point preceding the given index of the
1.1115 + * {@code char} array, where only array elements with
1.1116 + * {@code index} greater than or equal to {@code start}
1.1117 + * can be used. If the {@code char} value at {@code (index - 1)}
1.1118 + * in the {@code char} array is in the
1.1119 + * low-surrogate range, {@code (index - 2)} is not less than
1.1120 + * {@code start}, and the {@code char} value at
1.1121 + * {@code (index - 2)} in the {@code char} array is in
1.1122 + * the high-surrogate range, then the supplementary code point
1.1123 + * corresponding to this surrogate pair is returned. Otherwise,
1.1124 + * the {@code char} value at {@code (index - 1)} is
1.1125 + * returned.
1.1126 + *
1.1127 + * @param a the {@code char} array
1.1128 + * @param index the index following the code point that should be returned
1.1129 + * @param start the index of the first array element in the
1.1130 + * {@code char} array
1.1131 + * @return the Unicode code point value before the given index.
1.1132 + * @exception NullPointerException if {@code a} is null.
1.1133 + * @exception IndexOutOfBoundsException if the {@code index}
1.1134 + * argument is not greater than the {@code start} argument or
1.1135 + * is greater than the length of the {@code char} array, or
1.1136 + * if the {@code start} argument is negative or not less than
1.1137 + * the length of the {@code char} array.
1.1138 + * @since 1.5
1.1139 + */
1.1140 + public static int codePointBefore(char[] a, int index, int start) {
1.1141 + if (index <= start || start < 0 || start >= a.length) {
1.1142 + throw new IndexOutOfBoundsException();
1.1143 + }
1.1144 + return codePointBeforeImpl(a, index, start);
1.1145 + }
1.1146 +
1.1147 + // throws ArrayIndexOutofBoundsException if index-1 out of bounds
1.1148 + static int codePointBeforeImpl(char[] a, int index, int start) {
1.1149 + char c2 = a[--index];
1.1150 + if (isLowSurrogate(c2)) {
1.1151 + if (index > start) {
1.1152 + char c1 = a[--index];
1.1153 + if (isHighSurrogate(c1)) {
1.1154 + return toCodePoint(c1, c2);
1.1155 + }
1.1156 + }
1.1157 + }
1.1158 + return c2;
1.1159 + }
1.1160 +
1.1161 + /**
1.1162 + * Returns the leading surrogate (a
1.1163 + * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
1.1164 + * high surrogate code unit</a>) of the
1.1165 + * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.1166 + * surrogate pair</a>
1.1167 + * representing the specified supplementary character (Unicode
1.1168 + * code point) in the UTF-16 encoding. If the specified character
1.1169 + * is not a
1.1170 + * <a href="Character.html#supplementary">supplementary character</a>,
1.1171 + * an unspecified {@code char} is returned.
1.1172 + *
1.1173 + * <p>If
1.1174 + * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
1.1175 + * is {@code true}, then
1.1176 + * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
1.1177 + * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
1.1178 + * are also always {@code true}.
1.1179 + *
1.1180 + * @param codePoint a supplementary character (Unicode code point)
1.1181 + * @return the leading surrogate code unit used to represent the
1.1182 + * character in the UTF-16 encoding
1.1183 + * @since 1.7
1.1184 + */
1.1185 + public static char highSurrogate(int codePoint) {
1.1186 + return (char) ((codePoint >>> 10)
1.1187 + + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
1.1188 + }
1.1189 +
1.1190 + /**
1.1191 + * Returns the trailing surrogate (a
1.1192 + * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
1.1193 + * low surrogate code unit</a>) of the
1.1194 + * <a href="http://www.unicode.org/glossary/#surrogate_pair">
1.1195 + * surrogate pair</a>
1.1196 + * representing the specified supplementary character (Unicode
1.1197 + * code point) in the UTF-16 encoding. If the specified character
1.1198 + * is not a
1.1199 + * <a href="Character.html#supplementary">supplementary character</a>,
1.1200 + * an unspecified {@code char} is returned.
1.1201 + *
1.1202 + * <p>If
1.1203 + * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
1.1204 + * is {@code true}, then
1.1205 + * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
1.1206 + * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
1.1207 + * are also always {@code true}.
1.1208 + *
1.1209 + * @param codePoint a supplementary character (Unicode code point)
1.1210 + * @return the trailing surrogate code unit used to represent the
1.1211 + * character in the UTF-16 encoding
1.1212 + * @since 1.7
1.1213 + */
1.1214 + public static char lowSurrogate(int codePoint) {
1.1215 + return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
1.1216 + }
1.1217 +
1.1218 + /**
1.1219 + * Converts the specified character (Unicode code point) to its
1.1220 + * UTF-16 representation. If the specified code point is a BMP
1.1221 + * (Basic Multilingual Plane or Plane 0) value, the same value is
1.1222 + * stored in {@code dst[dstIndex]}, and 1 is returned. If the
1.1223 + * specified code point is a supplementary character, its
1.1224 + * surrogate values are stored in {@code dst[dstIndex]}
1.1225 + * (high-surrogate) and {@code dst[dstIndex+1]}
1.1226 + * (low-surrogate), and 2 is returned.
1.1227 + *
1.1228 + * @param codePoint the character (Unicode code point) to be converted.
1.1229 + * @param dst an array of {@code char} in which the
1.1230 + * {@code codePoint}'s UTF-16 value is stored.
1.1231 + * @param dstIndex the start index into the {@code dst}
1.1232 + * array where the converted value is stored.
1.1233 + * @return 1 if the code point is a BMP code point, 2 if the
1.1234 + * code point is a supplementary code point.
1.1235 + * @exception IllegalArgumentException if the specified
1.1236 + * {@code codePoint} is not a valid Unicode code point.
1.1237 + * @exception NullPointerException if the specified {@code dst} is null.
1.1238 + * @exception IndexOutOfBoundsException if {@code dstIndex}
1.1239 + * is negative or not less than {@code dst.length}, or if
1.1240 + * {@code dst} at {@code dstIndex} doesn't have enough
1.1241 + * array element(s) to store the resulting {@code char}
1.1242 + * value(s). (If {@code dstIndex} is equal to
1.1243 + * {@code dst.length-1} and the specified
1.1244 + * {@code codePoint} is a supplementary character, the
1.1245 + * high-surrogate value is not stored in
1.1246 + * {@code dst[dstIndex]}.)
1.1247 + * @since 1.5
1.1248 + */
1.1249 + public static int toChars(int codePoint, char[] dst, int dstIndex) {
1.1250 + if (isBmpCodePoint(codePoint)) {
1.1251 + dst[dstIndex] = (char) codePoint;
1.1252 + return 1;
1.1253 + } else if (isValidCodePoint(codePoint)) {
1.1254 + toSurrogates(codePoint, dst, dstIndex);
1.1255 + return 2;
1.1256 + } else {
1.1257 + throw new IllegalArgumentException();
1.1258 + }
1.1259 + }
1.1260 +
1.1261 + /**
1.1262 + * Converts the specified character (Unicode code point) to its
1.1263 + * UTF-16 representation stored in a {@code char} array. If
1.1264 + * the specified code point is a BMP (Basic Multilingual Plane or
1.1265 + * Plane 0) value, the resulting {@code char} array has
1.1266 + * the same value as {@code codePoint}. If the specified code
1.1267 + * point is a supplementary code point, the resulting
1.1268 + * {@code char} array has the corresponding surrogate pair.
1.1269 + *
1.1270 + * @param codePoint a Unicode code point
1.1271 + * @return a {@code char} array having
1.1272 + * {@code codePoint}'s UTF-16 representation.
1.1273 + * @exception IllegalArgumentException if the specified
1.1274 + * {@code codePoint} is not a valid Unicode code point.
1.1275 + * @since 1.5
1.1276 + */
1.1277 + public static char[] toChars(int codePoint) {
1.1278 + if (isBmpCodePoint(codePoint)) {
1.1279 + return new char[] { (char) codePoint };
1.1280 + } else if (isValidCodePoint(codePoint)) {
1.1281 + char[] result = new char[2];
1.1282 + toSurrogates(codePoint, result, 0);
1.1283 + return result;
1.1284 + } else {
1.1285 + throw new IllegalArgumentException();
1.1286 + }
1.1287 + }
1.1288 +
1.1289 + static void toSurrogates(int codePoint, char[] dst, int index) {
1.1290 + // We write elements "backwards" to guarantee all-or-nothing
1.1291 + dst[index+1] = lowSurrogate(codePoint);
1.1292 + dst[index] = highSurrogate(codePoint);
1.1293 + }
1.1294 +
1.1295 + /**
1.1296 + * Returns the number of Unicode code points in the text range of
1.1297 + * the specified char sequence. The text range begins at the
1.1298 + * specified {@code beginIndex} and extends to the
1.1299 + * {@code char} at index {@code endIndex - 1}. Thus the
1.1300 + * length (in {@code char}s) of the text range is
1.1301 + * {@code endIndex-beginIndex}. Unpaired surrogates within
1.1302 + * the text range count as one code point each.
1.1303 + *
1.1304 + * @param seq the char sequence
1.1305 + * @param beginIndex the index to the first {@code char} of
1.1306 + * the text range.
1.1307 + * @param endIndex the index after the last {@code char} of
1.1308 + * the text range.
1.1309 + * @return the number of Unicode code points in the specified text
1.1310 + * range
1.1311 + * @exception NullPointerException if {@code seq} is null.
1.1312 + * @exception IndexOutOfBoundsException if the
1.1313 + * {@code beginIndex} is negative, or {@code endIndex}
1.1314 + * is larger than the length of the given sequence, or
1.1315 + * {@code beginIndex} is larger than {@code endIndex}.
1.1316 + * @since 1.5
1.1317 + */
1.1318 + public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
1.1319 + int length = seq.length();
1.1320 + if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
1.1321 + throw new IndexOutOfBoundsException();
1.1322 + }
1.1323 + int n = endIndex - beginIndex;
1.1324 + for (int i = beginIndex; i < endIndex; ) {
1.1325 + if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
1.1326 + isLowSurrogate(seq.charAt(i))) {
1.1327 + n--;
1.1328 + i++;
1.1329 + }
1.1330 + }
1.1331 + return n;
1.1332 + }
1.1333 +
1.1334 + /**
1.1335 + * Returns the number of Unicode code points in a subarray of the
1.1336 + * {@code char} array argument. The {@code offset}
1.1337 + * argument is the index of the first {@code char} of the
1.1338 + * subarray and the {@code count} argument specifies the
1.1339 + * length of the subarray in {@code char}s. Unpaired
1.1340 + * surrogates within the subarray count as one code point each.
1.1341 + *
1.1342 + * @param a the {@code char} array
1.1343 + * @param offset the index of the first {@code char} in the
1.1344 + * given {@code char} array
1.1345 + * @param count the length of the subarray in {@code char}s
1.1346 + * @return the number of Unicode code points in the specified subarray
1.1347 + * @exception NullPointerException if {@code a} is null.
1.1348 + * @exception IndexOutOfBoundsException if {@code offset} or
1.1349 + * {@code count} is negative, or if {@code offset +
1.1350 + * count} is larger than the length of the given array.
1.1351 + * @since 1.5
1.1352 + */
1.1353 + public static int codePointCount(char[] a, int offset, int count) {
1.1354 + if (count > a.length - offset || offset < 0 || count < 0) {
1.1355 + throw new IndexOutOfBoundsException();
1.1356 + }
1.1357 + return codePointCountImpl(a, offset, count);
1.1358 + }
1.1359 +
1.1360 + static int codePointCountImpl(char[] a, int offset, int count) {
1.1361 + int endIndex = offset + count;
1.1362 + int n = count;
1.1363 + for (int i = offset; i < endIndex; ) {
1.1364 + if (isHighSurrogate(a[i++]) && i < endIndex &&
1.1365 + isLowSurrogate(a[i])) {
1.1366 + n--;
1.1367 + i++;
1.1368 + }
1.1369 + }
1.1370 + return n;
1.1371 + }
1.1372 +
1.1373 + /**
1.1374 + * Returns the index within the given char sequence that is offset
1.1375 + * from the given {@code index} by {@code codePointOffset}
1.1376 + * code points. Unpaired surrogates within the text range given by
1.1377 + * {@code index} and {@code codePointOffset} count as
1.1378 + * one code point each.
1.1379 + *
1.1380 + * @param seq the char sequence
1.1381 + * @param index the index to be offset
1.1382 + * @param codePointOffset the offset in code points
1.1383 + * @return the index within the char sequence
1.1384 + * @exception NullPointerException if {@code seq} is null.
1.1385 + * @exception IndexOutOfBoundsException if {@code index}
1.1386 + * is negative or larger then the length of the char sequence,
1.1387 + * or if {@code codePointOffset} is positive and the
1.1388 + * subsequence starting with {@code index} has fewer than
1.1389 + * {@code codePointOffset} code points, or if
1.1390 + * {@code codePointOffset} is negative and the subsequence
1.1391 + * before {@code index} has fewer than the absolute value
1.1392 + * of {@code codePointOffset} code points.
1.1393 + * @since 1.5
1.1394 + */
1.1395 + public static int offsetByCodePoints(CharSequence seq, int index,
1.1396 + int codePointOffset) {
1.1397 + int length = seq.length();
1.1398 + if (index < 0 || index > length) {
1.1399 + throw new IndexOutOfBoundsException();
1.1400 + }
1.1401 +
1.1402 + int x = index;
1.1403 + if (codePointOffset >= 0) {
1.1404 + int i;
1.1405 + for (i = 0; x < length && i < codePointOffset; i++) {
1.1406 + if (isHighSurrogate(seq.charAt(x++)) && x < length &&
1.1407 + isLowSurrogate(seq.charAt(x))) {
1.1408 + x++;
1.1409 + }
1.1410 + }
1.1411 + if (i < codePointOffset) {
1.1412 + throw new IndexOutOfBoundsException();
1.1413 + }
1.1414 + } else {
1.1415 + int i;
1.1416 + for (i = codePointOffset; x > 0 && i < 0; i++) {
1.1417 + if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
1.1418 + isHighSurrogate(seq.charAt(x-1))) {
1.1419 + x--;
1.1420 + }
1.1421 + }
1.1422 + if (i < 0) {
1.1423 + throw new IndexOutOfBoundsException();
1.1424 + }
1.1425 + }
1.1426 + return x;
1.1427 + }
1.1428 +
1.1429 + /**
1.1430 + * Returns the index within the given {@code char} subarray
1.1431 + * that is offset from the given {@code index} by
1.1432 + * {@code codePointOffset} code points. The
1.1433 + * {@code start} and {@code count} arguments specify a
1.1434 + * subarray of the {@code char} array. Unpaired surrogates
1.1435 + * within the text range given by {@code index} and
1.1436 + * {@code codePointOffset} count as one code point each.
1.1437 + *
1.1438 + * @param a the {@code char} array
1.1439 + * @param start the index of the first {@code char} of the
1.1440 + * subarray
1.1441 + * @param count the length of the subarray in {@code char}s
1.1442 + * @param index the index to be offset
1.1443 + * @param codePointOffset the offset in code points
1.1444 + * @return the index within the subarray
1.1445 + * @exception NullPointerException if {@code a} is null.
1.1446 + * @exception IndexOutOfBoundsException
1.1447 + * if {@code start} or {@code count} is negative,
1.1448 + * or if {@code start + count} is larger than the length of
1.1449 + * the given array,
1.1450 + * or if {@code index} is less than {@code start} or
1.1451 + * larger then {@code start + count},
1.1452 + * or if {@code codePointOffset} is positive and the text range
1.1453 + * starting with {@code index} and ending with {@code start + count - 1}
1.1454 + * has fewer than {@code codePointOffset} code
1.1455 + * points,
1.1456 + * or if {@code codePointOffset} is negative and the text range
1.1457 + * starting with {@code start} and ending with {@code index - 1}
1.1458 + * has fewer than the absolute value of
1.1459 + * {@code codePointOffset} code points.
1.1460 + * @since 1.5
1.1461 + */
1.1462 + public static int offsetByCodePoints(char[] a, int start, int count,
1.1463 + int index, int codePointOffset) {
1.1464 + if (count > a.length-start || start < 0 || count < 0
1.1465 + || index < start || index > start+count) {
1.1466 + throw new IndexOutOfBoundsException();
1.1467 + }
1.1468 + return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
1.1469 + }
1.1470 +
1.1471 + static int offsetByCodePointsImpl(char[]a, int start, int count,
1.1472 + int index, int codePointOffset) {
1.1473 + int x = index;
1.1474 + if (codePointOffset >= 0) {
1.1475 + int limit = start + count;
1.1476 + int i;
1.1477 + for (i = 0; x < limit && i < codePointOffset; i++) {
1.1478 + if (isHighSurrogate(a[x++]) && x < limit &&
1.1479 + isLowSurrogate(a[x])) {
1.1480 + x++;
1.1481 + }
1.1482 + }
1.1483 + if (i < codePointOffset) {
1.1484 + throw new IndexOutOfBoundsException();
1.1485 + }
1.1486 + } else {
1.1487 + int i;
1.1488 + for (i = codePointOffset; x > start && i < 0; i++) {
1.1489 + if (isLowSurrogate(a[--x]) && x > start &&
1.1490 + isHighSurrogate(a[x-1])) {
1.1491 + x--;
1.1492 + }
1.1493 + }
1.1494 + if (i < 0) {
1.1495 + throw new IndexOutOfBoundsException();
1.1496 + }
1.1497 + }
1.1498 + return x;
1.1499 + }
1.1500 +
1.1501 + /**
1.1502 + * Determines if the specified character is a lowercase character.
1.1503 + * <p>
1.1504 + * A character is lowercase if its general category type, provided
1.1505 + * by {@code Character.getType(ch)}, is
1.1506 + * {@code LOWERCASE_LETTER}, or it has contributory property
1.1507 + * Other_Lowercase as defined by the Unicode Standard.
1.1508 + * <p>
1.1509 + * The following are examples of lowercase characters:
1.1510 + * <p><blockquote><pre>
1.1511 + * a b c d e f g h i j k l m n o p q r s t u v w x y z
1.1512 + * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
1.1513 + * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
1.1514 + * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
1.1515 + * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
1.1516 + * </pre></blockquote>
1.1517 + * <p> Many other Unicode characters are lowercase too.
1.1518 + *
1.1519 + * <p><b>Note:</b> This method cannot handle <a
1.1520 + * href="#supplementary"> supplementary characters</a>. To support
1.1521 + * all Unicode characters, including supplementary characters, use
1.1522 + * the {@link #isLowerCase(int)} method.
1.1523 + *
1.1524 + * @param ch the character to be tested.
1.1525 + * @return {@code true} if the character is lowercase;
1.1526 + * {@code false} otherwise.
1.1527 + * @see Character#isLowerCase(char)
1.1528 + * @see Character#isTitleCase(char)
1.1529 + * @see Character#toLowerCase(char)
1.1530 + * @see Character#getType(char)
1.1531 + */
1.1532 + public static boolean isLowerCase(char ch) {
1.1533 + return ch == toLowerCase(ch);
1.1534 + }
1.1535 +
1.1536 + /**
1.1537 + * Determines if the specified character is an uppercase character.
1.1538 + * <p>
1.1539 + * A character is uppercase if its general category type, provided by
1.1540 + * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
1.1541 + * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
1.1542 + * <p>
1.1543 + * The following are examples of uppercase characters:
1.1544 + * <p><blockquote><pre>
1.1545 + * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
1.1546 + * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
1.1547 + * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
1.1548 + * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
1.1549 + * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
1.1550 + * </pre></blockquote>
1.1551 + * <p> Many other Unicode characters are uppercase too.<p>
1.1552 + *
1.1553 + * <p><b>Note:</b> This method cannot handle <a
1.1554 + * href="#supplementary"> supplementary characters</a>. To support
1.1555 + * all Unicode characters, including supplementary characters, use
1.1556 + * the {@link #isUpperCase(int)} method.
1.1557 + *
1.1558 + * @param ch the character to be tested.
1.1559 + * @return {@code true} if the character is uppercase;
1.1560 + * {@code false} otherwise.
1.1561 + * @see Character#isLowerCase(char)
1.1562 + * @see Character#isTitleCase(char)
1.1563 + * @see Character#toUpperCase(char)
1.1564 + * @see Character#getType(char)
1.1565 + * @since 1.0
1.1566 + */
1.1567 + public static boolean isUpperCase(char ch) {
1.1568 + return ch == toUpperCase(ch);
1.1569 + }
1.1570 +
1.1571 + /**
1.1572 + * Determines if the specified character is a titlecase character.
1.1573 + * <p>
1.1574 + * A character is a titlecase character if its general
1.1575 + * category type, provided by {@code Character.getType(ch)},
1.1576 + * is {@code TITLECASE_LETTER}.
1.1577 + * <p>
1.1578 + * Some characters look like pairs of Latin letters. For example, there
1.1579 + * is an uppercase letter that looks like "LJ" and has a corresponding
1.1580 + * lowercase letter that looks like "lj". A third form, which looks like "Lj",
1.1581 + * is the appropriate form to use when rendering a word in lowercase
1.1582 + * with initial capitals, as for a book title.
1.1583 + * <p>
1.1584 + * These are some of the Unicode characters for which this method returns
1.1585 + * {@code true}:
1.1586 + * <ul>
1.1587 + * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
1.1588 + * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
1.1589 + * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
1.1590 + * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
1.1591 + * </ul>
1.1592 + * <p> Many other Unicode characters are titlecase too.<p>
1.1593 + *
1.1594 + * <p><b>Note:</b> This method cannot handle <a
1.1595 + * href="#supplementary"> supplementary characters</a>. To support
1.1596 + * all Unicode characters, including supplementary characters, use
1.1597 + * the {@link #isTitleCase(int)} method.
1.1598 + *
1.1599 + * @param ch the character to be tested.
1.1600 + * @return {@code true} if the character is titlecase;
1.1601 + * {@code false} otherwise.
1.1602 + * @see Character#isLowerCase(char)
1.1603 + * @see Character#isUpperCase(char)
1.1604 + * @see Character#toTitleCase(char)
1.1605 + * @see Character#getType(char)
1.1606 + * @since 1.0.2
1.1607 + */
1.1608 + public static boolean isTitleCase(char ch) {
1.1609 + return isTitleCase((int)ch);
1.1610 + }
1.1611 +
1.1612 + /**
1.1613 + * Determines if the specified character (Unicode code point) is a titlecase character.
1.1614 + * <p>
1.1615 + * A character is a titlecase character if its general
1.1616 + * category type, provided by {@link Character#getType(int) getType(codePoint)},
1.1617 + * is {@code TITLECASE_LETTER}.
1.1618 + * <p>
1.1619 + * Some characters look like pairs of Latin letters. For example, there
1.1620 + * is an uppercase letter that looks like "LJ" and has a corresponding
1.1621 + * lowercase letter that looks like "lj". A third form, which looks like "Lj",
1.1622 + * is the appropriate form to use when rendering a word in lowercase
1.1623 + * with initial capitals, as for a book title.
1.1624 + * <p>
1.1625 + * These are some of the Unicode characters for which this method returns
1.1626 + * {@code true}:
1.1627 + * <ul>
1.1628 + * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
1.1629 + * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
1.1630 + * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
1.1631 + * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
1.1632 + * </ul>
1.1633 + * <p> Many other Unicode characters are titlecase too.<p>
1.1634 + *
1.1635 + * @param codePoint the character (Unicode code point) to be tested.
1.1636 + * @return {@code true} if the character is titlecase;
1.1637 + * {@code false} otherwise.
1.1638 + * @see Character#isLowerCase(int)
1.1639 + * @see Character#isUpperCase(int)
1.1640 + * @see Character#toTitleCase(int)
1.1641 + * @see Character#getType(int)
1.1642 + * @since 1.5
1.1643 + */
1.1644 + public static boolean isTitleCase(int codePoint) {
1.1645 + return getType(codePoint) == Character.TITLECASE_LETTER;
1.1646 + }
1.1647 +
1.1648 + /**
1.1649 + * Determines if the specified character is a digit.
1.1650 + * <p>
1.1651 + * A character is a digit if its general category type, provided
1.1652 + * by {@code Character.getType(ch)}, is
1.1653 + * {@code DECIMAL_DIGIT_NUMBER}.
1.1654 + * <p>
1.1655 + * Some Unicode character ranges that contain digits:
1.1656 + * <ul>
1.1657 + * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
1.1658 + * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
1.1659 + * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
1.1660 + * Arabic-Indic digits
1.1661 + * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
1.1662 + * Extended Arabic-Indic digits
1.1663 + * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
1.1664 + * Devanagari digits
1.1665 + * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
1.1666 + * Fullwidth digits
1.1667 + * </ul>
1.1668 + *
1.1669 + * Many other character ranges contain digits as well.
1.1670 + *
1.1671 + * <p><b>Note:</b> This method cannot handle <a
1.1672 + * href="#supplementary"> supplementary characters</a>. To support
1.1673 + * all Unicode characters, including supplementary characters, use
1.1674 + * the {@link #isDigit(int)} method.
1.1675 + *
1.1676 + * @param ch the character to be tested.
1.1677 + * @return {@code true} if the character is a digit;
1.1678 + * {@code false} otherwise.
1.1679 + * @see Character#digit(char, int)
1.1680 + * @see Character#forDigit(int, int)
1.1681 + * @see Character#getType(char)
1.1682 + */
1.1683 + public static boolean isDigit(char ch) {
1.1684 + return String.valueOf(ch).matches("\\d");
1.1685 + }
1.1686 +
1.1687 + /**
1.1688 + * Determines if the specified character (Unicode code point) is a digit.
1.1689 + * <p>
1.1690 + * A character is a digit if its general category type, provided
1.1691 + * by {@link Character#getType(int) getType(codePoint)}, is
1.1692 + * {@code DECIMAL_DIGIT_NUMBER}.
1.1693 + * <p>
1.1694 + * Some Unicode character ranges that contain digits:
1.1695 + * <ul>
1.1696 + * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
1.1697 + * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
1.1698 + * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
1.1699 + * Arabic-Indic digits
1.1700 + * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
1.1701 + * Extended Arabic-Indic digits
1.1702 + * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
1.1703 + * Devanagari digits
1.1704 + * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
1.1705 + * Fullwidth digits
1.1706 + * </ul>
1.1707 + *
1.1708 + * Many other character ranges contain digits as well.
1.1709 + *
1.1710 + * @param codePoint the character (Unicode code point) to be tested.
1.1711 + * @return {@code true} if the character is a digit;
1.1712 + * {@code false} otherwise.
1.1713 + * @see Character#forDigit(int, int)
1.1714 + * @see Character#getType(int)
1.1715 + * @since 1.5
1.1716 + */
1.1717 + public static boolean isDigit(int codePoint) {
1.1718 + return fromCodeChars(codePoint).matches("\\d");
1.1719 + }
1.1720 +
1.1721 + @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
1.1722 + private native static String fromCodeChars(int codePoint);
1.1723 +
1.1724 + /**
1.1725 + * Determines if a character is defined in Unicode.
1.1726 + * <p>
1.1727 + * A character is defined if at least one of the following is true:
1.1728 + * <ul>
1.1729 + * <li>It has an entry in the UnicodeData file.
1.1730 + * <li>It has a value in a range defined by the UnicodeData file.
1.1731 + * </ul>
1.1732 + *
1.1733 + * <p><b>Note:</b> This method cannot handle <a
1.1734 + * href="#supplementary"> supplementary characters</a>. To support
1.1735 + * all Unicode characters, including supplementary characters, use
1.1736 + * the {@link #isDefined(int)} method.
1.1737 + *
1.1738 + * @param ch the character to be tested
1.1739 + * @return {@code true} if the character has a defined meaning
1.1740 + * in Unicode; {@code false} otherwise.
1.1741 + * @see Character#isDigit(char)
1.1742 + * @see Character#isLetter(char)
1.1743 + * @see Character#isLetterOrDigit(char)
1.1744 + * @see Character#isLowerCase(char)
1.1745 + * @see Character#isTitleCase(char)
1.1746 + * @see Character#isUpperCase(char)
1.1747 + * @since 1.0.2
1.1748 + */
1.1749 + public static boolean isDefined(char ch) {
1.1750 + return isDefined((int)ch);
1.1751 + }
1.1752 +
1.1753 + /**
1.1754 + * Determines if a character (Unicode code point) is defined in Unicode.
1.1755 + * <p>
1.1756 + * A character is defined if at least one of the following is true:
1.1757 + * <ul>
1.1758 + * <li>It has an entry in the UnicodeData file.
1.1759 + * <li>It has a value in a range defined by the UnicodeData file.
1.1760 + * </ul>
1.1761 + *
1.1762 + * @param codePoint the character (Unicode code point) to be tested.
1.1763 + * @return {@code true} if the character has a defined meaning
1.1764 + * in Unicode; {@code false} otherwise.
1.1765 + * @see Character#isDigit(int)
1.1766 + * @see Character#isLetter(int)
1.1767 + * @see Character#isLetterOrDigit(int)
1.1768 + * @see Character#isLowerCase(int)
1.1769 + * @see Character#isTitleCase(int)
1.1770 + * @see Character#isUpperCase(int)
1.1771 + * @since 1.5
1.1772 + */
1.1773 + public static boolean isDefined(int codePoint) {
1.1774 + return getType(codePoint) != Character.UNASSIGNED;
1.1775 + }
1.1776 +
1.1777 + /**
1.1778 + * Determines if the specified character is a letter.
1.1779 + * <p>
1.1780 + * A character is considered to be a letter if its general
1.1781 + * category type, provided by {@code Character.getType(ch)},
1.1782 + * is any of the following:
1.1783 + * <ul>
1.1784 + * <li> {@code UPPERCASE_LETTER}
1.1785 + * <li> {@code LOWERCASE_LETTER}
1.1786 + * <li> {@code TITLECASE_LETTER}
1.1787 + * <li> {@code MODIFIER_LETTER}
1.1788 + * <li> {@code OTHER_LETTER}
1.1789 + * </ul>
1.1790 + *
1.1791 + * Not all letters have case. Many characters are
1.1792 + * letters but are neither uppercase nor lowercase nor titlecase.
1.1793 + *
1.1794 + * <p><b>Note:</b> This method cannot handle <a
1.1795 + * href="#supplementary"> supplementary characters</a>. To support
1.1796 + * all Unicode characters, including supplementary characters, use
1.1797 + * the {@link #isLetter(int)} method.
1.1798 + *
1.1799 + * @param ch the character to be tested.
1.1800 + * @return {@code true} if the character is a letter;
1.1801 + * {@code false} otherwise.
1.1802 + * @see Character#isDigit(char)
1.1803 + * @see Character#isJavaIdentifierStart(char)
1.1804 + * @see Character#isJavaLetter(char)
1.1805 + * @see Character#isJavaLetterOrDigit(char)
1.1806 + * @see Character#isLetterOrDigit(char)
1.1807 + * @see Character#isLowerCase(char)
1.1808 + * @see Character#isTitleCase(char)
1.1809 + * @see Character#isUnicodeIdentifierStart(char)
1.1810 + * @see Character#isUpperCase(char)
1.1811 + */
1.1812 + public static boolean isLetter(char ch) {
1.1813 + return String.valueOf(ch).matches("\\w") && !isDigit(ch);
1.1814 + }
1.1815 +
1.1816 + /**
1.1817 + * Determines if the specified character (Unicode code point) is a letter.
1.1818 + * <p>
1.1819 + * A character is considered to be a letter if its general
1.1820 + * category type, provided by {@link Character#getType(int) getType(codePoint)},
1.1821 + * is any of the following:
1.1822 + * <ul>
1.1823 + * <li> {@code UPPERCASE_LETTER}
1.1824 + * <li> {@code LOWERCASE_LETTER}
1.1825 + * <li> {@code TITLECASE_LETTER}
1.1826 + * <li> {@code MODIFIER_LETTER}
1.1827 + * <li> {@code OTHER_LETTER}
1.1828 + * </ul>
1.1829 + *
1.1830 + * Not all letters have case. Many characters are
1.1831 + * letters but are neither uppercase nor lowercase nor titlecase.
1.1832 + *
1.1833 + * @param codePoint the character (Unicode code point) to be tested.
1.1834 + * @return {@code true} if the character is a letter;
1.1835 + * {@code false} otherwise.
1.1836 + * @see Character#isDigit(int)
1.1837 + * @see Character#isJavaIdentifierStart(int)
1.1838 + * @see Character#isLetterOrDigit(int)
1.1839 + * @see Character#isLowerCase(int)
1.1840 + * @see Character#isTitleCase(int)
1.1841 + * @see Character#isUnicodeIdentifierStart(int)
1.1842 + * @see Character#isUpperCase(int)
1.1843 + * @since 1.5
1.1844 + */
1.1845 + public static boolean isLetter(int codePoint) {
1.1846 + return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
1.1847 + }
1.1848 +
1.1849 + /**
1.1850 + * Determines if the specified character is a letter or digit.
1.1851 + * <p>
1.1852 + * A character is considered to be a letter or digit if either
1.1853 + * {@code Character.isLetter(char ch)} or
1.1854 + * {@code Character.isDigit(char ch)} returns
1.1855 + * {@code true} for the character.
1.1856 + *
1.1857 + * <p><b>Note:</b> This method cannot handle <a
1.1858 + * href="#supplementary"> supplementary characters</a>. To support
1.1859 + * all Unicode characters, including supplementary characters, use
1.1860 + * the {@link #isLetterOrDigit(int)} method.
1.1861 + *
1.1862 + * @param ch the character to be tested.
1.1863 + * @return {@code true} if the character is a letter or digit;
1.1864 + * {@code false} otherwise.
1.1865 + * @see Character#isDigit(char)
1.1866 + * @see Character#isJavaIdentifierPart(char)
1.1867 + * @see Character#isJavaLetter(char)
1.1868 + * @see Character#isJavaLetterOrDigit(char)
1.1869 + * @see Character#isLetter(char)
1.1870 + * @see Character#isUnicodeIdentifierPart(char)
1.1871 + * @since 1.0.2
1.1872 + */
1.1873 + public static boolean isLetterOrDigit(char ch) {
1.1874 + return String.valueOf(ch).matches("\\w");
1.1875 + }
1.1876 +
1.1877 + /**
1.1878 + * Determines if the specified character (Unicode code point) is a letter or digit.
1.1879 + * <p>
1.1880 + * A character is considered to be a letter or digit if either
1.1881 + * {@link #isLetter(int) isLetter(codePoint)} or
1.1882 + * {@link #isDigit(int) isDigit(codePoint)} returns
1.1883 + * {@code true} for the character.
1.1884 + *
1.1885 + * @param codePoint the character (Unicode code point) to be tested.
1.1886 + * @return {@code true} if the character is a letter or digit;
1.1887 + * {@code false} otherwise.
1.1888 + * @see Character#isDigit(int)
1.1889 + * @see Character#isJavaIdentifierPart(int)
1.1890 + * @see Character#isLetter(int)
1.1891 + * @see Character#isUnicodeIdentifierPart(int)
1.1892 + * @since 1.5
1.1893 + */
1.1894 + public static boolean isLetterOrDigit(int codePoint) {
1.1895 + return fromCodeChars(codePoint).matches("\\w");
1.1896 + }
1.1897 +
1.1898 + static int getType(int x) {
1.1899 + throw new UnsupportedOperationException();
1.1900 + }
1.1901 +
1.1902 + /**
1.1903 + * Converts the character argument to lowercase using case
1.1904 + * mapping information from the UnicodeData file.
1.1905 + * <p>
1.1906 + * Note that
1.1907 + * {@code Character.isLowerCase(Character.toLowerCase(ch))}
1.1908 + * does not always return {@code true} for some ranges of
1.1909 + * characters, particularly those that are symbols or ideographs.
1.1910 + *
1.1911 + * <p>In general, {@link String#toLowerCase()} should be used to map
1.1912 + * characters to lowercase. {@code String} case mapping methods
1.1913 + * have several benefits over {@code Character} case mapping methods.
1.1914 + * {@code String} case mapping methods can perform locale-sensitive
1.1915 + * mappings, context-sensitive mappings, and 1:M character mappings, whereas
1.1916 + * the {@code Character} case mapping methods cannot.
1.1917 + *
1.1918 + * <p><b>Note:</b> This method cannot handle <a
1.1919 + * href="#supplementary"> supplementary characters</a>. To support
1.1920 + * all Unicode characters, including supplementary characters, use
1.1921 + * the {@link #toLowerCase(int)} method.
1.1922 + *
1.1923 + * @param ch the character to be converted.
1.1924 + * @return the lowercase equivalent of the character, if any;
1.1925 + * otherwise, the character itself.
1.1926 + * @see Character#isLowerCase(char)
1.1927 + * @see String#toLowerCase()
1.1928 + */
1.1929 + public static char toLowerCase(char ch) {
1.1930 + return String.valueOf(ch).toLowerCase().charAt(0);
1.1931 + }
1.1932 +
1.1933 + /**
1.1934 + * Converts the character argument to uppercase using case mapping
1.1935 + * information from the UnicodeData file.
1.1936 + * <p>
1.1937 + * Note that
1.1938 + * {@code Character.isUpperCase(Character.toUpperCase(ch))}
1.1939 + * does not always return {@code true} for some ranges of
1.1940 + * characters, particularly those that are symbols or ideographs.
1.1941 + *
1.1942 + * <p>In general, {@link String#toUpperCase()} should be used to map
1.1943 + * characters to uppercase. {@code String} case mapping methods
1.1944 + * have several benefits over {@code Character} case mapping methods.
1.1945 + * {@code String} case mapping methods can perform locale-sensitive
1.1946 + * mappings, context-sensitive mappings, and 1:M character mappings, whereas
1.1947 + * the {@code Character} case mapping methods cannot.
1.1948 + *
1.1949 + * <p><b>Note:</b> This method cannot handle <a
1.1950 + * href="#supplementary"> supplementary characters</a>. To support
1.1951 + * all Unicode characters, including supplementary characters, use
1.1952 + * the {@link #toUpperCase(int)} method.
1.1953 + *
1.1954 + * @param ch the character to be converted.
1.1955 + * @return the uppercase equivalent of the character, if any;
1.1956 + * otherwise, the character itself.
1.1957 + * @see Character#isUpperCase(char)
1.1958 + * @see String#toUpperCase()
1.1959 + */
1.1960 + public static char toUpperCase(char ch) {
1.1961 + return String.valueOf(ch).toUpperCase().charAt(0);
1.1962 + }
1.1963 +
1.1964 + /**
1.1965 + * Returns the numeric value of the character {@code ch} in the
1.1966 + * specified radix.
1.1967 + * <p>
1.1968 + * If the radix is not in the range {@code MIN_RADIX} ≤
1.1969 + * {@code radix} ≤ {@code MAX_RADIX} or if the
1.1970 + * value of {@code ch} is not a valid digit in the specified
1.1971 + * radix, {@code -1} is returned. A character is a valid digit
1.1972 + * if at least one of the following is true:
1.1973 + * <ul>
1.1974 + * <li>The method {@code isDigit} is {@code true} of the character
1.1975 + * and the Unicode decimal digit value of the character (or its
1.1976 + * single-character decomposition) is less than the specified radix.
1.1977 + * In this case the decimal digit value is returned.
1.1978 + * <li>The character is one of the uppercase Latin letters
1.1979 + * {@code 'A'} through {@code 'Z'} and its code is less than
1.1980 + * {@code radix + 'A' - 10}.
1.1981 + * In this case, {@code ch - 'A' + 10}
1.1982 + * is returned.
1.1983 + * <li>The character is one of the lowercase Latin letters
1.1984 + * {@code 'a'} through {@code 'z'} and its code is less than
1.1985 + * {@code radix + 'a' - 10}.
1.1986 + * In this case, {@code ch - 'a' + 10}
1.1987 + * is returned.
1.1988 + * <li>The character is one of the fullwidth uppercase Latin letters A
1.1989 + * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
1.1990 + * and its code is less than
1.1991 + * {@code radix + '\u005CuFF21' - 10}.
1.1992 + * In this case, {@code ch - '\u005CuFF21' + 10}
1.1993 + * is returned.
1.1994 + * <li>The character is one of the fullwidth lowercase Latin letters a
1.1995 + * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
1.1996 + * and its code is less than
1.1997 + * {@code radix + '\u005CuFF41' - 10}.
1.1998 + * In this case, {@code ch - '\u005CuFF41' + 10}
1.1999 + * is returned.
1.2000 + * </ul>
1.2001 + *
1.2002 + * <p><b>Note:</b> This method cannot handle <a
1.2003 + * href="#supplementary"> supplementary characters</a>. To support
1.2004 + * all Unicode characters, including supplementary characters, use
1.2005 + * the {@link #digit(int, int)} method.
1.2006 + *
1.2007 + * @param ch the character to be converted.
1.2008 + * @param radix the radix.
1.2009 + * @return the numeric value represented by the character in the
1.2010 + * specified radix.
1.2011 + * @see Character#forDigit(int, int)
1.2012 + * @see Character#isDigit(char)
1.2013 + */
1.2014 + public static int digit(char ch, int radix) {
1.2015 + return digit((int)ch, radix);
1.2016 + }
1.2017 +
1.2018 + /**
1.2019 + * Returns the numeric value of the specified character (Unicode
1.2020 + * code point) in the specified radix.
1.2021 + *
1.2022 + * <p>If the radix is not in the range {@code MIN_RADIX} ≤
1.2023 + * {@code radix} ≤ {@code MAX_RADIX} or if the
1.2024 + * character is not a valid digit in the specified
1.2025 + * radix, {@code -1} is returned. A character is a valid digit
1.2026 + * if at least one of the following is true:
1.2027 + * <ul>
1.2028 + * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
1.2029 + * and the Unicode decimal digit value of the character (or its
1.2030 + * single-character decomposition) is less than the specified radix.
1.2031 + * In this case the decimal digit value is returned.
1.2032 + * <li>The character is one of the uppercase Latin letters
1.2033 + * {@code 'A'} through {@code 'Z'} and its code is less than
1.2034 + * {@code radix + 'A' - 10}.
1.2035 + * In this case, {@code codePoint - 'A' + 10}
1.2036 + * is returned.
1.2037 + * <li>The character is one of the lowercase Latin letters
1.2038 + * {@code 'a'} through {@code 'z'} and its code is less than
1.2039 + * {@code radix + 'a' - 10}.
1.2040 + * In this case, {@code codePoint - 'a' + 10}
1.2041 + * is returned.
1.2042 + * <li>The character is one of the fullwidth uppercase Latin letters A
1.2043 + * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
1.2044 + * and its code is less than
1.2045 + * {@code radix + '\u005CuFF21' - 10}.
1.2046 + * In this case,
1.2047 + * {@code codePoint - '\u005CuFF21' + 10}
1.2048 + * is returned.
1.2049 + * <li>The character is one of the fullwidth lowercase Latin letters a
1.2050 + * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
1.2051 + * and its code is less than
1.2052 + * {@code radix + '\u005CuFF41'- 10}.
1.2053 + * In this case,
1.2054 + * {@code codePoint - '\u005CuFF41' + 10}
1.2055 + * is returned.
1.2056 + * </ul>
1.2057 + *
1.2058 + * @param codePoint the character (Unicode code point) to be converted.
1.2059 + * @param radix the radix.
1.2060 + * @return the numeric value represented by the character in the
1.2061 + * specified radix.
1.2062 + * @see Character#forDigit(int, int)
1.2063 + * @see Character#isDigit(int)
1.2064 + * @since 1.5
1.2065 + */
1.2066 + public static int digit(int codePoint, int radix) {
1.2067 + throw new UnsupportedOperationException();
1.2068 + }
1.2069 +
1.2070 + /**
1.2071 + * Returns the {@code int} value that the specified Unicode
1.2072 + * character represents. For example, the character
1.2073 + * {@code '\u005Cu216C'} (the roman numeral fifty) will return
1.2074 + * an int with a value of 50.
1.2075 + * <p>
1.2076 + * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
1.2077 + * {@code '\u005Cu005A'}), lowercase
1.2078 + * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
1.2079 + * full width variant ({@code '\u005CuFF21'} through
1.2080 + * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
1.2081 + * {@code '\u005CuFF5A'}) forms have numeric values from 10
1.2082 + * through 35. This is independent of the Unicode specification,
1.2083 + * which does not assign numeric values to these {@code char}
1.2084 + * values.
1.2085 + * <p>
1.2086 + * If the character does not have a numeric value, then -1 is returned.
1.2087 + * If the character has a numeric value that cannot be represented as a
1.2088 + * nonnegative integer (for example, a fractional value), then -2
1.2089 + * is returned.
1.2090 + *
1.2091 + * <p><b>Note:</b> This method cannot handle <a
1.2092 + * href="#supplementary"> supplementary characters</a>. To support
1.2093 + * all Unicode characters, including supplementary characters, use
1.2094 + * the {@link #getNumericValue(int)} method.
1.2095 + *
1.2096 + * @param ch the character to be converted.
1.2097 + * @return the numeric value of the character, as a nonnegative {@code int}
1.2098 + * value; -2 if the character has a numeric value that is not a
1.2099 + * nonnegative integer; -1 if the character has no numeric value.
1.2100 + * @see Character#forDigit(int, int)
1.2101 + * @see Character#isDigit(char)
1.2102 + * @since 1.1
1.2103 + */
1.2104 + public static int getNumericValue(char ch) {
1.2105 + return getNumericValue((int)ch);
1.2106 + }
1.2107 +
1.2108 + /**
1.2109 + * Returns the {@code int} value that the specified
1.2110 + * character (Unicode code point) represents. For example, the character
1.2111 + * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
1.2112 + * an {@code int} with a value of 50.
1.2113 + * <p>
1.2114 + * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
1.2115 + * {@code '\u005Cu005A'}), lowercase
1.2116 + * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
1.2117 + * full width variant ({@code '\u005CuFF21'} through
1.2118 + * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
1.2119 + * {@code '\u005CuFF5A'}) forms have numeric values from 10
1.2120 + * through 35. This is independent of the Unicode specification,
1.2121 + * which does not assign numeric values to these {@code char}
1.2122 + * values.
1.2123 + * <p>
1.2124 + * If the character does not have a numeric value, then -1 is returned.
1.2125 + * If the character has a numeric value that cannot be represented as a
1.2126 + * nonnegative integer (for example, a fractional value), then -2
1.2127 + * is returned.
1.2128 + *
1.2129 + * @param codePoint the character (Unicode code point) to be converted.
1.2130 + * @return the numeric value of the character, as a nonnegative {@code int}
1.2131 + * value; -2 if the character has a numeric value that is not a
1.2132 + * nonnegative integer; -1 if the character has no numeric value.
1.2133 + * @see Character#forDigit(int, int)
1.2134 + * @see Character#isDigit(int)
1.2135 + * @since 1.5
1.2136 + */
1.2137 + public static int getNumericValue(int codePoint) {
1.2138 + throw new UnsupportedOperationException();
1.2139 + }
1.2140 +
1.2141 + /**
1.2142 + * Determines if the specified character is ISO-LATIN-1 white space.
1.2143 + * This method returns {@code true} for the following five
1.2144 + * characters only:
1.2145 + * <table>
1.2146 + * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
1.2147 + * <td>{@code HORIZONTAL TABULATION}</td></tr>
1.2148 + * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
1.2149 + * <td>{@code NEW LINE}</td></tr>
1.2150 + * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
1.2151 + * <td>{@code FORM FEED}</td></tr>
1.2152 + * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
1.2153 + * <td>{@code CARRIAGE RETURN}</td></tr>
1.2154 + * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
1.2155 + * <td>{@code SPACE}</td></tr>
1.2156 + * </table>
1.2157 + *
1.2158 + * @param ch the character to be tested.
1.2159 + * @return {@code true} if the character is ISO-LATIN-1 white
1.2160 + * space; {@code false} otherwise.
1.2161 + * @see Character#isSpaceChar(char)
1.2162 + * @see Character#isWhitespace(char)
1.2163 + * @deprecated Replaced by isWhitespace(char).
1.2164 + */
1.2165 + @Deprecated
1.2166 + public static boolean isSpace(char ch) {
1.2167 + return (ch <= 0x0020) &&
1.2168 + (((((1L << 0x0009) |
1.2169 + (1L << 0x000A) |
1.2170 + (1L << 0x000C) |
1.2171 + (1L << 0x000D) |
1.2172 + (1L << 0x0020)) >> ch) & 1L) != 0);
1.2173 + }
1.2174 +
1.2175 +
1.2176 +
1.2177 + /**
1.2178 + * Determines if the specified character is white space according to Java.
1.2179 + * A character is a Java whitespace character if and only if it satisfies
1.2180 + * one of the following criteria:
1.2181 + * <ul>
1.2182 + * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
1.2183 + * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
1.2184 + * but is not also a non-breaking space ({@code '\u005Cu00A0'},
1.2185 + * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
1.2186 + * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
1.2187 + * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
1.2188 + * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
1.2189 + * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
1.2190 + * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
1.2191 + * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
1.2192 + * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
1.2193 + * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
1.2194 + * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
1.2195 + * </ul>
1.2196 + *
1.2197 + * <p><b>Note:</b> This method cannot handle <a
1.2198 + * href="#supplementary"> supplementary characters</a>. To support
1.2199 + * all Unicode characters, including supplementary characters, use
1.2200 + * the {@link #isWhitespace(int)} method.
1.2201 + *
1.2202 + * @param ch the character to be tested.
1.2203 + * @return {@code true} if the character is a Java whitespace
1.2204 + * character; {@code false} otherwise.
1.2205 + * @see Character#isSpaceChar(char)
1.2206 + * @since 1.1
1.2207 + */
1.2208 + public static boolean isWhitespace(char ch) {
1.2209 + return isWhitespace((int)ch);
1.2210 + }
1.2211 +
1.2212 + /**
1.2213 + * Determines if the specified character (Unicode code point) is
1.2214 + * white space according to Java. A character is a Java
1.2215 + * whitespace character if and only if it satisfies one of the
1.2216 + * following criteria:
1.2217 + * <ul>
1.2218 + * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
1.2219 + * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
1.2220 + * but is not also a non-breaking space ({@code '\u005Cu00A0'},
1.2221 + * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
1.2222 + * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
1.2223 + * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
1.2224 + * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
1.2225 + * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
1.2226 + * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
1.2227 + * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
1.2228 + * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
1.2229 + * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
1.2230 + * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
1.2231 + * </ul>
1.2232 + * <p>
1.2233 + *
1.2234 + * @param codePoint the character (Unicode code point) to be tested.
1.2235 + * @return {@code true} if the character is a Java whitespace
1.2236 + * character; {@code false} otherwise.
1.2237 + * @see Character#isSpaceChar(int)
1.2238 + * @since 1.5
1.2239 + */
1.2240 + public static boolean isWhitespace(int codePoint) {
1.2241 + throw new UnsupportedOperationException();
1.2242 + }
1.2243 +
1.2244 + /**
1.2245 + * Determines if the specified character is an ISO control
1.2246 + * character. A character is considered to be an ISO control
1.2247 + * character if its code is in the range {@code '\u005Cu0000'}
1.2248 + * through {@code '\u005Cu001F'} or in the range
1.2249 + * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
1.2250 + *
1.2251 + * <p><b>Note:</b> This method cannot handle <a
1.2252 + * href="#supplementary"> supplementary characters</a>. To support
1.2253 + * all Unicode characters, including supplementary characters, use
1.2254 + * the {@link #isISOControl(int)} method.
1.2255 + *
1.2256 + * @param ch the character to be tested.
1.2257 + * @return {@code true} if the character is an ISO control character;
1.2258 + * {@code false} otherwise.
1.2259 + *
1.2260 + * @see Character#isSpaceChar(char)
1.2261 + * @see Character#isWhitespace(char)
1.2262 + * @since 1.1
1.2263 + */
1.2264 + public static boolean isISOControl(char ch) {
1.2265 + return isISOControl((int)ch);
1.2266 + }
1.2267 +
1.2268 + /**
1.2269 + * Determines if the referenced character (Unicode code point) is an ISO control
1.2270 + * character. A character is considered to be an ISO control
1.2271 + * character if its code is in the range {@code '\u005Cu0000'}
1.2272 + * through {@code '\u005Cu001F'} or in the range
1.2273 + * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
1.2274 + *
1.2275 + * @param codePoint the character (Unicode code point) to be tested.
1.2276 + * @return {@code true} if the character is an ISO control character;
1.2277 + * {@code false} otherwise.
1.2278 + * @see Character#isSpaceChar(int)
1.2279 + * @see Character#isWhitespace(int)
1.2280 + * @since 1.5
1.2281 + */
1.2282 + public static boolean isISOControl(int codePoint) {
1.2283 + // Optimized form of:
1.2284 + // (codePoint >= 0x00 && codePoint <= 0x1F) ||
1.2285 + // (codePoint >= 0x7F && codePoint <= 0x9F);
1.2286 + return codePoint <= 0x9F &&
1.2287 + (codePoint >= 0x7F || (codePoint >>> 5 == 0));
1.2288 + }
1.2289 +
1.2290 + /**
1.2291 + * Determines the character representation for a specific digit in
1.2292 + * the specified radix. If the value of {@code radix} is not a
1.2293 + * valid radix, or the value of {@code digit} is not a valid
1.2294 + * digit in the specified radix, the null character
1.2295 + * ({@code '\u005Cu0000'}) is returned.
1.2296 + * <p>
1.2297 + * The {@code radix} argument is valid if it is greater than or
1.2298 + * equal to {@code MIN_RADIX} and less than or equal to
1.2299 + * {@code MAX_RADIX}. The {@code digit} argument is valid if
1.2300 + * {@code 0 <= digit < radix}.
1.2301 + * <p>
1.2302 + * If the digit is less than 10, then
1.2303 + * {@code '0' + digit} is returned. Otherwise, the value
1.2304 + * {@code 'a' + digit - 10} is returned.
1.2305 + *
1.2306 + * @param digit the number to convert to a character.
1.2307 + * @param radix the radix.
1.2308 + * @return the {@code char} representation of the specified digit
1.2309 + * in the specified radix.
1.2310 + * @see Character#MIN_RADIX
1.2311 + * @see Character#MAX_RADIX
1.2312 + * @see Character#digit(char, int)
1.2313 + */
1.2314 + public static char forDigit(int digit, int radix) {
1.2315 + if ((digit >= radix) || (digit < 0)) {
1.2316 + return '\0';
1.2317 + }
1.2318 + if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
1.2319 + return '\0';
1.2320 + }
1.2321 + if (digit < 10) {
1.2322 + return (char)('0' + digit);
1.2323 + }
1.2324 + return (char)('a' - 10 + digit);
1.2325 + }
1.2326 +
1.2327 + /**
1.2328 + * Compares two {@code Character} objects numerically.
1.2329 + *
1.2330 + * @param anotherCharacter the {@code Character} to be compared.
1.2331 +
1.2332 + * @return the value {@code 0} if the argument {@code Character}
1.2333 + * is equal to this {@code Character}; a value less than
1.2334 + * {@code 0} if this {@code Character} is numerically less
1.2335 + * than the {@code Character} argument; and a value greater than
1.2336 + * {@code 0} if this {@code Character} is numerically greater
1.2337 + * than the {@code Character} argument (unsigned comparison).
1.2338 + * Note that this is strictly a numerical comparison; it is not
1.2339 + * locale-dependent.
1.2340 + * @since 1.2
1.2341 + */
1.2342 + public int compareTo(Character anotherCharacter) {
1.2343 + return compare(this.value, anotherCharacter.value);
1.2344 + }
1.2345 +
1.2346 + /**
1.2347 + * Compares two {@code char} values numerically.
1.2348 + * The value returned is identical to what would be returned by:
1.2349 + * <pre>
1.2350 + * Character.valueOf(x).compareTo(Character.valueOf(y))
1.2351 + * </pre>
1.2352 + *
1.2353 + * @param x the first {@code char} to compare
1.2354 + * @param y the second {@code char} to compare
1.2355 + * @return the value {@code 0} if {@code x == y};
1.2356 + * a value less than {@code 0} if {@code x < y}; and
1.2357 + * a value greater than {@code 0} if {@code x > y}
1.2358 + * @since 1.7
1.2359 + */
1.2360 + public static int compare(char x, char y) {
1.2361 + return x - y;
1.2362 + }
1.2363 +
1.2364 +
1.2365 + /**
1.2366 + * The number of bits used to represent a <tt>char</tt> value in unsigned
1.2367 + * binary form, constant {@code 16}.
1.2368 + *
1.2369 + * @since 1.5
1.2370 + */
1.2371 + public static final int SIZE = 16;
1.2372 +
1.2373 + /**
1.2374 + * Returns the value obtained by reversing the order of the bytes in the
1.2375 + * specified <tt>char</tt> value.
1.2376 + *
1.2377 + * @return the value obtained by reversing (or, equivalently, swapping)
1.2378 + * the bytes in the specified <tt>char</tt> value.
1.2379 + * @since 1.5
1.2380 + */
1.2381 + public static char reverseBytes(char ch) {
1.2382 + return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
1.2383 + }
1.2384 +
1.2385 +}