hg/bck2brwsr: emul/src/main/java/java/lang/Character.java@9f3c454e74d4 (annotated)

jaroslav@68	1	/*
jaroslav@68	2	* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
jaroslav@68	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@68	4	*
jaroslav@68	5	* This code is free software; you can redistribute it and/or modify it
jaroslav@68	6	* under the terms of the GNU General Public License version 2 only, as
jaroslav@68	7	* published by the Free Software Foundation. Oracle designates this
jaroslav@68	8	* particular file as subject to the "Classpath" exception as provided
jaroslav@68	9	* by Oracle in the LICENSE file that accompanied this code.
jaroslav@68	10	*
jaroslav@68	11	* This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@68	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@68	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
jaroslav@68	14	* version 2 for more details (a copy is included in the LICENSE file that
jaroslav@68	15	* accompanied this code).
jaroslav@68	16	*
jaroslav@68	17	* You should have received a copy of the GNU General Public License version
jaroslav@68	18	* 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@68	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@68	20	*
jaroslav@68	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@68	22	* or visit www.oracle.com if you need additional information or have any
jaroslav@68	23	* questions.
jaroslav@68	24	*/
jaroslav@68	25
jaroslav@68	26	package java.lang;
jaroslav@68	27
jaroslav@68	28	/**
jaroslav@68	29	* The {@code Character} class wraps a value of the primitive
jaroslav@68	30	* type {@code char} in an object. An object of type
jaroslav@68	31	* {@code Character} contains a single field whose type is
jaroslav@68	32	* {@code char}.
jaroslav@68	33	* <p>
jaroslav@68	34	* In addition, this class provides several methods for determining
jaroslav@68	35	* a character's category (lowercase letter, digit, etc.) and for converting
jaroslav@68	36	* characters from uppercase to lowercase and vice versa.
jaroslav@68	37	* <p>
jaroslav@68	38	* Character information is based on the Unicode Standard, version 6.0.0.
jaroslav@68	39	* <p>
jaroslav@68	40	* The methods and data of class {@code Character} are defined by
jaroslav@68	41	* the information in the <i>UnicodeData</i> file that is part of the
jaroslav@68	42	* Unicode Character Database maintained by the Unicode
jaroslav@68	43	* Consortium. This file specifies various properties including name
jaroslav@68	44	* and general category for every defined Unicode code point or
jaroslav@68	45	* character range.
jaroslav@68	46	* <p>
jaroslav@68	47	* The file and its description are available from the Unicode Consortium at:
jaroslav@68	48	* <ul>
jaroslav@68	49	* <li><a href="http://www.unicode.org">http://www.unicode.org</a>
jaroslav@68	50	* </ul>
jaroslav@68	51	*
jaroslav@68	52	* <h4><a name="unicode">Unicode Character Representations</a></h4>
jaroslav@68	53	*
jaroslav@68	54	* <p>The {@code char} data type (and therefore the value that a
jaroslav@68	55	* {@code Character} object encapsulates) are based on the
jaroslav@68	56	* original Unicode specification, which defined characters as
jaroslav@68	57	* fixed-width 16-bit entities. The Unicode Standard has since been
jaroslav@68	58	* changed to allow for characters whose representation requires more
jaroslav@68	59	* than 16 bits. The range of legal <em>code point</em>s is now
jaroslav@68	60	* U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
jaroslav@68	61	* (Refer to the <a
jaroslav@68	62	* href="http://www.unicode.org/reports/tr27/#notation"><i>
jaroslav@68	63	* definition</i></a> of the U+<i>n</i> notation in the Unicode
jaroslav@68	64	* Standard.)
jaroslav@68	65	*
jaroslav@68	66	* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
jaroslav@68	67	* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
jaroslav@68	68	* <a name="supplementary">Characters</a> whose code points are greater
jaroslav@68	69	* than U+FFFF are called <em>supplementary character</em>s. The Java
jaroslav@68	70	* platform uses the UTF-16 representation in {@code char} arrays and
jaroslav@68	71	* in the {@code String} and {@code StringBuffer} classes. In
jaroslav@68	72	* this representation, supplementary characters are represented as a pair
jaroslav@68	73	* of {@code char} values, the first from the <em>high-surrogates</em>
jaroslav@68	74	* range, (\uD800-\uDBFF), the second from the
jaroslav@68	75	* <em>low-surrogates</em> range (\uDC00-\uDFFF).
jaroslav@68	76	*
jaroslav@68	77	* <p>A {@code char} value, therefore, represents Basic
jaroslav@68	78	* Multilingual Plane (BMP) code points, including the surrogate
jaroslav@68	79	* code points, or code units of the UTF-16 encoding. An
jaroslav@68	80	* {@code int} value represents all Unicode code points,
jaroslav@68	81	* including supplementary code points. The lower (least significant)
jaroslav@68	82	* 21 bits of {@code int} are used to represent Unicode code
jaroslav@68	83	* points and the upper (most significant) 11 bits must be zero.
jaroslav@68	84	* Unless otherwise specified, the behavior with respect to
jaroslav@68	85	* supplementary characters and surrogate {@code char} values is
jaroslav@68	86	* as follows:
jaroslav@68	87	*
jaroslav@68	88	* <ul>
jaroslav@68	89	* <li>The methods that only accept a {@code char} value cannot support
jaroslav@68	90	* supplementary characters. They treat {@code char} values from the
jaroslav@68	91	* surrogate ranges as undefined characters. For example,
jaroslav@68	92	* {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
jaroslav@68	93	* this specific value if followed by any low-surrogate value in a string
jaroslav@68	94	* would represent a letter.
jaroslav@68	95	*
jaroslav@68	96	* <li>The methods that accept an {@code int} value support all
jaroslav@68	97	* Unicode characters, including supplementary characters. For
jaroslav@68	98	* example, {@code Character.isLetter(0x2F81A)} returns
jaroslav@68	99	* {@code true} because the code point value represents a letter
jaroslav@68	100	* (a CJK ideograph).
jaroslav@68	101	* </ul>
jaroslav@68	102	*
jaroslav@68	103	* <p>In the Java SE API documentation, <em>Unicode code point</em> is
jaroslav@68	104	* used for character values in the range between U+0000 and U+10FFFF,
jaroslav@68	105	* and <em>Unicode code unit</em> is used for 16-bit
jaroslav@68	106	* {@code char} values that are code units of the <em>UTF-16</em>
jaroslav@68	107	* encoding. For more information on Unicode terminology, refer to the
jaroslav@68	108	* <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
jaroslav@68	109	*
jaroslav@68	110	* @author Lee Boynton
jaroslav@68	111	* @author Guy Steele
jaroslav@68	112	* @author Akira Tanaka
jaroslav@68	113	* @author Martin Buchholz
jaroslav@68	114	* @author Ulf Zibis
jaroslav@68	115	* @since 1.0
jaroslav@68	116	*/
jaroslav@68	117	public final
jaroslav@68	118	class Character implements java.io.Serializable, Comparable<Character> {
jaroslav@68	119	/**
jaroslav@68	120	* The minimum radix available for conversion to and from strings.
jaroslav@68	121	* The constant value of this field is the smallest value permitted
jaroslav@68	122	* for the radix argument in radix-conversion methods such as the
jaroslav@68	123	* {@code digit} method, the {@code forDigit} method, and the
jaroslav@68	124	* {@code toString} method of class {@code Integer}.
jaroslav@68	125	*
jaroslav@68	126	* @see Character#digit(char, int)
jaroslav@68	127	* @see Character#forDigit(int, int)
jaroslav@68	128	* @see Integer#toString(int, int)
jaroslav@68	129	* @see Integer#valueOf(String)
jaroslav@68	130	*/
jaroslav@68	131	public static final int MIN_RADIX = 2;
jaroslav@68	132
jaroslav@68	133	/**
jaroslav@68	134	* The maximum radix available for conversion to and from strings.
jaroslav@68	135	* The constant value of this field is the largest value permitted
jaroslav@68	136	* for the radix argument in radix-conversion methods such as the
jaroslav@68	137	* {@code digit} method, the {@code forDigit} method, and the
jaroslav@68	138	* {@code toString} method of class {@code Integer}.
jaroslav@68	139	*
jaroslav@68	140	* @see Character#digit(char, int)
jaroslav@68	141	* @see Character#forDigit(int, int)
jaroslav@68	142	* @see Integer#toString(int, int)
jaroslav@68	143	* @see Integer#valueOf(String)
jaroslav@68	144	*/
jaroslav@68	145	public static final int MAX_RADIX = 36;
jaroslav@68	146
jaroslav@68	147	/**
jaroslav@68	148	* The constant value of this field is the smallest value of type
jaroslav@68	149	* {@code char}, {@code '\u005Cu0000'}.
jaroslav@68	150	*
jaroslav@68	151	* @since 1.0.2
jaroslav@68	152	*/
jaroslav@68	153	public static final char MIN_VALUE = '\u0000';
jaroslav@68	154
jaroslav@68	155	/**
jaroslav@68	156	* The constant value of this field is the largest value of type
jaroslav@68	157	* {@code char}, {@code '\u005CuFFFF'}.
jaroslav@68	158	*
jaroslav@68	159	* @since 1.0.2
jaroslav@68	160	*/
jaroslav@68	161	public static final char MAX_VALUE = '\uFFFF';
jaroslav@68	162
jaroslav@68	163	/**
jaroslav@68	164	* The {@code Class} instance representing the primitive type
jaroslav@68	165	* {@code char}.
jaroslav@68	166	*
jaroslav@68	167	* @since 1.1
jaroslav@68	168	*/
jaroslav@68	169	public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
jaroslav@68	170
jaroslav@68	171	/*
jaroslav@68	172	* Normative general types
jaroslav@68	173	*/
jaroslav@68	174
jaroslav@68	175	/*
jaroslav@68	176	* General character types
jaroslav@68	177	*/
jaroslav@68	178
jaroslav@68	179	/**
jaroslav@68	180	* General category "Cn" in the Unicode specification.
jaroslav@68	181	* @since 1.1
jaroslav@68	182	*/
jaroslav@68	183	public static final byte UNASSIGNED = 0;
jaroslav@68	184
jaroslav@68	185	/**
jaroslav@68	186	* General category "Lu" in the Unicode specification.
jaroslav@68	187	* @since 1.1
jaroslav@68	188	*/
jaroslav@68	189	public static final byte UPPERCASE_LETTER = 1;
jaroslav@68	190
jaroslav@68	191	/**
jaroslav@68	192	* General category "Ll" in the Unicode specification.
jaroslav@68	193	* @since 1.1
jaroslav@68	194	*/
jaroslav@68	195	public static final byte LOWERCASE_LETTER = 2;
jaroslav@68	196
jaroslav@68	197	/**
jaroslav@68	198	* General category "Lt" in the Unicode specification.
jaroslav@68	199	* @since 1.1
jaroslav@68	200	*/
jaroslav@68	201	public static final byte TITLECASE_LETTER = 3;
jaroslav@68	202
jaroslav@68	203	/**
jaroslav@68	204	* General category "Lm" in the Unicode specification.
jaroslav@68	205	* @since 1.1
jaroslav@68	206	*/
jaroslav@68	207	public static final byte MODIFIER_LETTER = 4;
jaroslav@68	208
jaroslav@68	209	/**
jaroslav@68	210	* General category "Lo" in the Unicode specification.
jaroslav@68	211	* @since 1.1
jaroslav@68	212	*/
jaroslav@68	213	public static final byte OTHER_LETTER = 5;
jaroslav@68	214
jaroslav@68	215	/**
jaroslav@68	216	* General category "Mn" in the Unicode specification.
jaroslav@68	217	* @since 1.1
jaroslav@68	218	*/
jaroslav@68	219	public static final byte NON_SPACING_MARK = 6;
jaroslav@68	220
jaroslav@68	221	/**
jaroslav@68	222	* General category "Me" in the Unicode specification.
jaroslav@68	223	* @since 1.1
jaroslav@68	224	*/
jaroslav@68	225	public static final byte ENCLOSING_MARK = 7;
jaroslav@68	226
jaroslav@68	227	/**
jaroslav@68	228	* General category "Mc" in the Unicode specification.
jaroslav@68	229	* @since 1.1
jaroslav@68	230	*/
jaroslav@68	231	public static final byte COMBINING_SPACING_MARK = 8;
jaroslav@68	232
jaroslav@68	233	/**
jaroslav@68	234	* General category "Nd" in the Unicode specification.
jaroslav@68	235	* @since 1.1
jaroslav@68	236	*/
jaroslav@68	237	public static final byte DECIMAL_DIGIT_NUMBER = 9;
jaroslav@68	238
jaroslav@68	239	/**
jaroslav@68	240	* General category "Nl" in the Unicode specification.
jaroslav@68	241	* @since 1.1
jaroslav@68	242	*/
jaroslav@68	243	public static final byte LETTER_NUMBER = 10;
jaroslav@68	244
jaroslav@68	245	/**
jaroslav@68	246	* General category "No" in the Unicode specification.
jaroslav@68	247	* @since 1.1
jaroslav@68	248	*/
jaroslav@68	249	public static final byte OTHER_NUMBER = 11;
jaroslav@68	250
jaroslav@68	251	/**
jaroslav@68	252	* General category "Zs" in the Unicode specification.
jaroslav@68	253	* @since 1.1
jaroslav@68	254	*/
jaroslav@68	255	public static final byte SPACE_SEPARATOR = 12;
jaroslav@68	256
jaroslav@68	257	/**
jaroslav@68	258	* General category "Zl" in the Unicode specification.
jaroslav@68	259	* @since 1.1
jaroslav@68	260	*/
jaroslav@68	261	public static final byte LINE_SEPARATOR = 13;
jaroslav@68	262
jaroslav@68	263	/**
jaroslav@68	264	* General category "Zp" in the Unicode specification.
jaroslav@68	265	* @since 1.1
jaroslav@68	266	*/
jaroslav@68	267	public static final byte PARAGRAPH_SEPARATOR = 14;
jaroslav@68	268
jaroslav@68	269	/**
jaroslav@68	270	* General category "Cc" in the Unicode specification.
jaroslav@68	271	* @since 1.1
jaroslav@68	272	*/
jaroslav@68	273	public static final byte CONTROL = 15;
jaroslav@68	274
jaroslav@68	275	/**
jaroslav@68	276	* General category "Cf" in the Unicode specification.
jaroslav@68	277	* @since 1.1
jaroslav@68	278	*/
jaroslav@68	279	public static final byte FORMAT = 16;
jaroslav@68	280
jaroslav@68	281	/**
jaroslav@68	282	* General category "Co" in the Unicode specification.
jaroslav@68	283	* @since 1.1
jaroslav@68	284	*/
jaroslav@68	285	public static final byte PRIVATE_USE = 18;
jaroslav@68	286
jaroslav@68	287	/**
jaroslav@68	288	* General category "Cs" in the Unicode specification.
jaroslav@68	289	* @since 1.1
jaroslav@68	290	*/
jaroslav@68	291	public static final byte SURROGATE = 19;
jaroslav@68	292
jaroslav@68	293	/**
jaroslav@68	294	* General category "Pd" in the Unicode specification.
jaroslav@68	295	* @since 1.1
jaroslav@68	296	*/
jaroslav@68	297	public static final byte DASH_PUNCTUATION = 20;
jaroslav@68	298
jaroslav@68	299	/**
jaroslav@68	300	* General category "Ps" in the Unicode specification.
jaroslav@68	301	* @since 1.1
jaroslav@68	302	*/
jaroslav@68	303	public static final byte START_PUNCTUATION = 21;
jaroslav@68	304
jaroslav@68	305	/**
jaroslav@68	306	* General category "Pe" in the Unicode specification.
jaroslav@68	307	* @since 1.1
jaroslav@68	308	*/
jaroslav@68	309	public static final byte END_PUNCTUATION = 22;
jaroslav@68	310
jaroslav@68	311	/**
jaroslav@68	312	* General category "Pc" in the Unicode specification.
jaroslav@68	313	* @since 1.1
jaroslav@68	314	*/
jaroslav@68	315	public static final byte CONNECTOR_PUNCTUATION = 23;
jaroslav@68	316
jaroslav@68	317	/**
jaroslav@68	318	* General category "Po" in the Unicode specification.
jaroslav@68	319	* @since 1.1
jaroslav@68	320	*/
jaroslav@68	321	public static final byte OTHER_PUNCTUATION = 24;
jaroslav@68	322
jaroslav@68	323	/**
jaroslav@68	324	* General category "Sm" in the Unicode specification.
jaroslav@68	325	* @since 1.1
jaroslav@68	326	*/
jaroslav@68	327	public static final byte MATH_SYMBOL = 25;
jaroslav@68	328
jaroslav@68	329	/**
jaroslav@68	330	* General category "Sc" in the Unicode specification.
jaroslav@68	331	* @since 1.1
jaroslav@68	332	*/
jaroslav@68	333	public static final byte CURRENCY_SYMBOL = 26;
jaroslav@68	334
jaroslav@68	335	/**
jaroslav@68	336	* General category "Sk" in the Unicode specification.
jaroslav@68	337	* @since 1.1
jaroslav@68	338	*/
jaroslav@68	339	public static final byte MODIFIER_SYMBOL = 27;
jaroslav@68	340
jaroslav@68	341	/**
jaroslav@68	342	* General category "So" in the Unicode specification.
jaroslav@68	343	* @since 1.1
jaroslav@68	344	*/
jaroslav@68	345	public static final byte OTHER_SYMBOL = 28;
jaroslav@68	346
jaroslav@68	347	/**
jaroslav@68	348	* General category "Pi" in the Unicode specification.
jaroslav@68	349	* @since 1.4
jaroslav@68	350	*/
jaroslav@68	351	public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
jaroslav@68	352
jaroslav@68	353	/**
jaroslav@68	354	* General category "Pf" in the Unicode specification.
jaroslav@68	355	* @since 1.4
jaroslav@68	356	*/
jaroslav@68	357	public static final byte FINAL_QUOTE_PUNCTUATION = 30;
jaroslav@68	358
jaroslav@68	359	/**
jaroslav@68	360	* Error flag. Use int (code point) to avoid confusion with U+FFFF.
jaroslav@68	361	*/
jaroslav@68	362	static final int ERROR = 0xFFFFFFFF;
jaroslav@68	363
jaroslav@68	364
jaroslav@68	365	/**
jaroslav@68	366	* Undefined bidirectional character type. Undefined {@code char}
jaroslav@68	367	* values have undefined directionality in the Unicode specification.
jaroslav@68	368	* @since 1.4
jaroslav@68	369	*/
jaroslav@68	370	public static final byte DIRECTIONALITY_UNDEFINED = -1;
jaroslav@68	371
jaroslav@68	372	/**
jaroslav@68	373	* Strong bidirectional character type "L" in the Unicode specification.
jaroslav@68	374	* @since 1.4
jaroslav@68	375	*/
jaroslav@68	376	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
jaroslav@68	377
jaroslav@68	378	/**
jaroslav@68	379	* Strong bidirectional character type "R" in the Unicode specification.
jaroslav@68	380	* @since 1.4
jaroslav@68	381	*/
jaroslav@68	382	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
jaroslav@68	383
jaroslav@68	384	/**
jaroslav@68	385	* Strong bidirectional character type "AL" in the Unicode specification.
jaroslav@68	386	* @since 1.4
jaroslav@68	387	*/
jaroslav@68	388	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
jaroslav@68	389
jaroslav@68	390	/**
jaroslav@68	391	* Weak bidirectional character type "EN" in the Unicode specification.
jaroslav@68	392	* @since 1.4
jaroslav@68	393	*/
jaroslav@68	394	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
jaroslav@68	395
jaroslav@68	396	/**
jaroslav@68	397	* Weak bidirectional character type "ES" in the Unicode specification.
jaroslav@68	398	* @since 1.4
jaroslav@68	399	*/
jaroslav@68	400	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
jaroslav@68	401
jaroslav@68	402	/**
jaroslav@68	403	* Weak bidirectional character type "ET" in the Unicode specification.
jaroslav@68	404	* @since 1.4
jaroslav@68	405	*/
jaroslav@68	406	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
jaroslav@68	407
jaroslav@68	408	/**
jaroslav@68	409	* Weak bidirectional character type "AN" in the Unicode specification.
jaroslav@68	410	* @since 1.4
jaroslav@68	411	*/
jaroslav@68	412	public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
jaroslav@68	413
jaroslav@68	414	/**
jaroslav@68	415	* Weak bidirectional character type "CS" in the Unicode specification.
jaroslav@68	416	* @since 1.4
jaroslav@68	417	*/
jaroslav@68	418	public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
jaroslav@68	419
jaroslav@68	420	/**
jaroslav@68	421	* Weak bidirectional character type "NSM" in the Unicode specification.
jaroslav@68	422	* @since 1.4
jaroslav@68	423	*/
jaroslav@68	424	public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
jaroslav@68	425
jaroslav@68	426	/**
jaroslav@68	427	* Weak bidirectional character type "BN" in the Unicode specification.
jaroslav@68	428	* @since 1.4
jaroslav@68	429	*/
jaroslav@68	430	public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
jaroslav@68	431
jaroslav@68	432	/**
jaroslav@68	433	* Neutral bidirectional character type "B" in the Unicode specification.
jaroslav@68	434	* @since 1.4
jaroslav@68	435	*/
jaroslav@68	436	public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
jaroslav@68	437
jaroslav@68	438	/**
jaroslav@68	439	* Neutral bidirectional character type "S" in the Unicode specification.
jaroslav@68	440	* @since 1.4
jaroslav@68	441	*/
jaroslav@68	442	public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
jaroslav@68	443
jaroslav@68	444	/**
jaroslav@68	445	* Neutral bidirectional character type "WS" in the Unicode specification.
jaroslav@68	446	* @since 1.4
jaroslav@68	447	*/
jaroslav@68	448	public static final byte DIRECTIONALITY_WHITESPACE = 12;
jaroslav@68	449
jaroslav@68	450	/**
jaroslav@68	451	* Neutral bidirectional character type "ON" in the Unicode specification.
jaroslav@68	452	* @since 1.4
jaroslav@68	453	*/
jaroslav@68	454	public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
jaroslav@68	455
jaroslav@68	456	/**
jaroslav@68	457	* Strong bidirectional character type "LRE" in the Unicode specification.
jaroslav@68	458	* @since 1.4
jaroslav@68	459	*/
jaroslav@68	460	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
jaroslav@68	461
jaroslav@68	462	/**
jaroslav@68	463	* Strong bidirectional character type "LRO" in the Unicode specification.
jaroslav@68	464	* @since 1.4
jaroslav@68	465	*/
jaroslav@68	466	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
jaroslav@68	467
jaroslav@68	468	/**
jaroslav@68	469	* Strong bidirectional character type "RLE" in the Unicode specification.
jaroslav@68	470	* @since 1.4
jaroslav@68	471	*/
jaroslav@68	472	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
jaroslav@68	473
jaroslav@68	474	/**
jaroslav@68	475	* Strong bidirectional character type "RLO" in the Unicode specification.
jaroslav@68	476	* @since 1.4
jaroslav@68	477	*/
jaroslav@68	478	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
jaroslav@68	479
jaroslav@68	480	/**
jaroslav@68	481	* Weak bidirectional character type "PDF" in the Unicode specification.
jaroslav@68	482	* @since 1.4
jaroslav@68	483	*/
jaroslav@68	484	public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
jaroslav@68	485
jaroslav@68	486	/**
jaroslav@68	487	* The minimum value of a
jaroslav@68	488	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	489	* Unicode high-surrogate code unit</a>
jaroslav@68	490	* in the UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68	491	* A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68	492	*
jaroslav@68	493	* @since 1.5
jaroslav@68	494	*/
jaroslav@68	495	public static final char MIN_HIGH_SURROGATE = '\uD800';
jaroslav@68	496
jaroslav@68	497	/**
jaroslav@68	498	* The maximum value of a
jaroslav@68	499	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	500	* Unicode high-surrogate code unit</a>
jaroslav@68	501	* in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
jaroslav@68	502	* A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68	503	*
jaroslav@68	504	* @since 1.5
jaroslav@68	505	*/
jaroslav@68	506	public static final char MAX_HIGH_SURROGATE = '\uDBFF';
jaroslav@68	507
jaroslav@68	508	/**
jaroslav@68	509	* The minimum value of a
jaroslav@68	510	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	511	* Unicode low-surrogate code unit</a>
jaroslav@68	512	* in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
jaroslav@68	513	* A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68	514	*
jaroslav@68	515	* @since 1.5
jaroslav@68	516	*/
jaroslav@68	517	public static final char MIN_LOW_SURROGATE = '\uDC00';
jaroslav@68	518
jaroslav@68	519	/**
jaroslav@68	520	* The maximum value of a
jaroslav@68	521	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	522	* Unicode low-surrogate code unit</a>
jaroslav@68	523	* in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68	524	* A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68	525	*
jaroslav@68	526	* @since 1.5
jaroslav@68	527	*/
jaroslav@68	528	public static final char MAX_LOW_SURROGATE = '\uDFFF';
jaroslav@68	529
jaroslav@68	530	/**
jaroslav@68	531	* The minimum value of a Unicode surrogate code unit in the
jaroslav@68	532	* UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68	533	*
jaroslav@68	534	* @since 1.5
jaroslav@68	535	*/
jaroslav@68	536	public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
jaroslav@68	537
jaroslav@68	538	/**
jaroslav@68	539	* The maximum value of a Unicode surrogate code unit in the
jaroslav@68	540	* UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68	541	*
jaroslav@68	542	* @since 1.5
jaroslav@68	543	*/
jaroslav@68	544	public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
jaroslav@68	545
jaroslav@68	546	/**
jaroslav@68	547	* The minimum value of a
jaroslav@68	548	* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
jaroslav@68	549	* Unicode supplementary code point</a>, constant {@code U+10000}.
jaroslav@68	550	*
jaroslav@68	551	* @since 1.5
jaroslav@68	552	*/
jaroslav@68	553	public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
jaroslav@68	554
jaroslav@68	555	/**
jaroslav@68	556	* The minimum value of a
jaroslav@68	557	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	558	* Unicode code point</a>, constant {@code U+0000}.
jaroslav@68	559	*
jaroslav@68	560	* @since 1.5
jaroslav@68	561	*/
jaroslav@68	562	public static final int MIN_CODE_POINT = 0x000000;
jaroslav@68	563
jaroslav@68	564	/**
jaroslav@68	565	* The maximum value of a
jaroslav@68	566	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	567	* Unicode code point</a>, constant {@code U+10FFFF}.
jaroslav@68	568	*
jaroslav@68	569	* @since 1.5
jaroslav@68	570	*/
jaroslav@68	571	public static final int MAX_CODE_POINT = 0X10FFFF;
jaroslav@68	572
jaroslav@68	573
jaroslav@68	574	/**
jaroslav@68	575	* Instances of this class represent particular subsets of the Unicode
jaroslav@68	576	* character set. The only family of subsets defined in the
jaroslav@68	577	* {@code Character} class is {@link Character.UnicodeBlock}.
jaroslav@68	578	* Other portions of the Java API may define other subsets for their
jaroslav@68	579	* own purposes.
jaroslav@68	580	*
jaroslav@68	581	* @since 1.2
jaroslav@68	582	*/
jaroslav@68	583	public static class Subset {
jaroslav@68	584
jaroslav@68	585	private String name;
jaroslav@68	586
jaroslav@68	587	/**
jaroslav@68	588	* Constructs a new {@code Subset} instance.
jaroslav@68	589	*
jaroslav@68	590	* @param name The name of this subset
jaroslav@68	591	* @exception NullPointerException if name is {@code null}
jaroslav@68	592	*/
jaroslav@68	593	protected Subset(String name) {
jaroslav@68	594	if (name == null) {
jaroslav@68	595	throw new NullPointerException("name");
jaroslav@68	596	}
jaroslav@68	597	this.name = name;
jaroslav@68	598	}
jaroslav@68	599
jaroslav@68	600	/**
jaroslav@68	601	* Compares two {@code Subset} objects for equality.
jaroslav@68	602	* This method returns {@code true} if and only if
jaroslav@68	603	* {@code this} and the argument refer to the same
jaroslav@68	604	* object; since this method is {@code final}, this
jaroslav@68	605	* guarantee holds for all subclasses.
jaroslav@68	606	*/
jaroslav@68	607	public final boolean equals(Object obj) {
jaroslav@68	608	return (this == obj);
jaroslav@68	609	}
jaroslav@68	610
jaroslav@68	611	/**
jaroslav@68	612	* Returns the standard hash code as defined by the
jaroslav@68	613	* {@link Object#hashCode} method. This method
jaroslav@68	614	* is {@code final} in order to ensure that the
jaroslav@68	615	* {@code equals} and {@code hashCode} methods will
jaroslav@68	616	* be consistent in all subclasses.
jaroslav@68	617	*/
jaroslav@68	618	public final int hashCode() {
jaroslav@68	619	return super.hashCode();
jaroslav@68	620	}
jaroslav@68	621
jaroslav@68	622	/**
jaroslav@68	623	* Returns the name of this subset.
jaroslav@68	624	*/
jaroslav@68	625	public final String toString() {
jaroslav@68	626	return name;
jaroslav@68	627	}
jaroslav@68	628	}
jaroslav@68	629
jaroslav@68	630	// See http://www.unicode.org/Public/UNIDATA/Blocks.txt
jaroslav@68	631	// for the latest specification of Unicode Blocks.
jaroslav@68	632
jaroslav@68	633
jaroslav@68	634	/**
jaroslav@68	635	* The value of the {@code Character}.
jaroslav@68	636	*
jaroslav@68	637	* @serial
jaroslav@68	638	*/
jaroslav@68	639	private final char value;
jaroslav@68	640
jaroslav@68	641	/** use serialVersionUID from JDK 1.0.2 for interoperability */
jaroslav@68	642	private static final long serialVersionUID = 3786198910865385080L;
jaroslav@68	643
jaroslav@68	644	/**
jaroslav@68	645	* Constructs a newly allocated {@code Character} object that
jaroslav@68	646	* represents the specified {@code char} value.
jaroslav@68	647	*
jaroslav@68	648	* @param value the value to be represented by the
jaroslav@68	649	* {@code Character} object.
jaroslav@68	650	*/
jaroslav@68	651	public Character(char value) {
jaroslav@68	652	this.value = value;
jaroslav@68	653	}
jaroslav@68	654
jaroslav@68	655	private static class CharacterCache {
jaroslav@68	656	private CharacterCache(){}
jaroslav@68	657
jaroslav@68	658	static final Character cache[] = new Character[127 + 1];
jaroslav@68	659
jaroslav@68	660	static {
jaroslav@68	661	for (int i = 0; i < cache.length; i++)
jaroslav@68	662	cache[i] = new Character((char)i);
jaroslav@68	663	}
jaroslav@68	664	}
jaroslav@68	665
jaroslav@68	666	/**
jaroslav@68	667	* Returns a <tt>Character</tt> instance representing the specified
jaroslav@68	668	* <tt>char</tt> value.
jaroslav@68	669	* If a new <tt>Character</tt> instance is not required, this method
jaroslav@68	670	* should generally be used in preference to the constructor
jaroslav@68	671	* {@link #Character(char)}, as this method is likely to yield
jaroslav@68	672	* significantly better space and time performance by caching
jaroslav@68	673	* frequently requested values.
jaroslav@68	674	*
jaroslav@68	675	* This method will always cache values in the range {@code
jaroslav@68	676	* '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
jaroslav@68	677	* cache other values outside of this range.
jaroslav@68	678	*
jaroslav@68	679	* @param c a char value.
jaroslav@68	680	* @return a <tt>Character</tt> instance representing <tt>c</tt>.
jaroslav@68	681	* @since 1.5
jaroslav@68	682	*/
jaroslav@68	683	public static Character valueOf(char c) {
jaroslav@68	684	if (c <= 127) { // must cache
jaroslav@68	685	return CharacterCache.cache[(int)c];
jaroslav@68	686	}
jaroslav@68	687	return new Character(c);
jaroslav@68	688	}
jaroslav@68	689
jaroslav@68	690	/**
jaroslav@68	691	* Returns the value of this {@code Character} object.
jaroslav@68	692	* @return the primitive {@code char} value represented by
jaroslav@68	693	* this object.
jaroslav@68	694	*/
jaroslav@68	695	public char charValue() {
jaroslav@68	696	return value;
jaroslav@68	697	}
jaroslav@68	698
jaroslav@68	699	/**
jaroslav@68	700	* Returns a hash code for this {@code Character}; equal to the result
jaroslav@68	701	* of invoking {@code charValue()}.
jaroslav@68	702	*
jaroslav@68	703	* @return a hash code value for this {@code Character}
jaroslav@68	704	*/
jaroslav@68	705	public int hashCode() {
jaroslav@68	706	return (int)value;
jaroslav@68	707	}
jaroslav@68	708
jaroslav@68	709	/**
jaroslav@68	710	* Compares this object against the specified object.
jaroslav@68	711	* The result is {@code true} if and only if the argument is not
jaroslav@68	712	* {@code null} and is a {@code Character} object that
jaroslav@68	713	* represents the same {@code char} value as this object.
jaroslav@68	714	*
jaroslav@68	715	* @param obj the object to compare with.
jaroslav@68	716	* @return {@code true} if the objects are the same;
jaroslav@68	717	* {@code false} otherwise.
jaroslav@68	718	*/
jaroslav@68	719	public boolean equals(Object obj) {
jaroslav@68	720	if (obj instanceof Character) {
jaroslav@68	721	return value == ((Character)obj).charValue();
jaroslav@68	722	}
jaroslav@68	723	return false;
jaroslav@68	724	}
jaroslav@68	725
jaroslav@68	726	/**
jaroslav@68	727	* Returns a {@code String} object representing this
jaroslav@68	728	* {@code Character}'s value. The result is a string of
jaroslav@68	729	* length 1 whose sole component is the primitive
jaroslav@68	730	* {@code char} value represented by this
jaroslav@68	731	* {@code Character} object.
jaroslav@68	732	*
jaroslav@68	733	* @return a string representation of this object.
jaroslav@68	734	*/
jaroslav@68	735	public String toString() {
jaroslav@68	736	char buf[] = {value};
jaroslav@68	737	return String.valueOf(buf);
jaroslav@68	738	}
jaroslav@68	739
jaroslav@68	740	/**
jaroslav@68	741	* Returns a {@code String} object representing the
jaroslav@68	742	* specified {@code char}. The result is a string of length
jaroslav@68	743	* 1 consisting solely of the specified {@code char}.
jaroslav@68	744	*
jaroslav@68	745	* @param c the {@code char} to be converted
jaroslav@68	746	* @return the string representation of the specified {@code char}
jaroslav@68	747	* @since 1.4
jaroslav@68	748	*/
jaroslav@68	749	public static String toString(char c) {
jaroslav@68	750	return String.valueOf(c);
jaroslav@68	751	}
jaroslav@68	752
jaroslav@68	753	/**
jaroslav@68	754	* Determines whether the specified code point is a valid
jaroslav@68	755	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	756	* Unicode code point value</a>.
jaroslav@68	757	*
jaroslav@68	758	* @param codePoint the Unicode code point to be tested
jaroslav@68	759	* @return {@code true} if the specified code point value is between
jaroslav@68	760	* {@link #MIN_CODE_POINT} and
jaroslav@68	761	* {@link #MAX_CODE_POINT} inclusive;
jaroslav@68	762	* {@code false} otherwise.
jaroslav@68	763	* @since 1.5
jaroslav@68	764	*/
jaroslav@68	765	public static boolean isValidCodePoint(int codePoint) {
jaroslav@68	766	// Optimized form of:
jaroslav@68	767	// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
jaroslav@68	768	int plane = codePoint >>> 16;
jaroslav@68	769	return plane < ((MAX_CODE_POINT + 1) >>> 16);
jaroslav@68	770	}
jaroslav@68	771
jaroslav@68	772	/**
jaroslav@68	773	* Determines whether the specified character (Unicode code point)
jaroslav@68	774	* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
jaroslav@68	775	* Such code points can be represented using a single {@code char}.
jaroslav@68	776	*
jaroslav@68	777	* @param codePoint the character (Unicode code point) to be tested
jaroslav@68	778	* @return {@code true} if the specified code point is between
jaroslav@68	779	* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
jaroslav@68	780	* {@code false} otherwise.
jaroslav@68	781	* @since 1.7
jaroslav@68	782	*/
jaroslav@68	783	public static boolean isBmpCodePoint(int codePoint) {
jaroslav@68	784	return codePoint >>> 16 == 0;
jaroslav@68	785	// Optimized form of:
jaroslav@68	786	// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
jaroslav@68	787	// We consistently use logical shift (>>>) to facilitate
jaroslav@68	788	// additional runtime optimizations.
jaroslav@68	789	}
jaroslav@68	790
jaroslav@68	791	/**
jaroslav@68	792	* Determines whether the specified character (Unicode code point)
jaroslav@68	793	* is in the <a href="#supplementary">supplementary character</a> range.
jaroslav@68	794	*
jaroslav@68	795	* @param codePoint the character (Unicode code point) to be tested
jaroslav@68	796	* @return {@code true} if the specified code point is between
jaroslav@68	797	* {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
jaroslav@68	798	* {@link #MAX_CODE_POINT} inclusive;
jaroslav@68	799	* {@code false} otherwise.
jaroslav@68	800	* @since 1.5
jaroslav@68	801	*/
jaroslav@68	802	public static boolean isSupplementaryCodePoint(int codePoint) {
jaroslav@68	803	return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68	804	&& codePoint < MAX_CODE_POINT + 1;
jaroslav@68	805	}
jaroslav@68	806
jaroslav@68	807	/**
jaroslav@68	808	* Determines if the given {@code char} value is a
jaroslav@68	809	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	810	* Unicode high-surrogate code unit</a>
jaroslav@68	811	* (also known as <i>leading-surrogate code unit</i>).
jaroslav@68	812	*
jaroslav@68	813	* <p>Such values do not represent characters by themselves,
jaroslav@68	814	* but are used in the representation of
jaroslav@68	815	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	816	* in the UTF-16 encoding.
jaroslav@68	817	*
jaroslav@68	818	* @param ch the {@code char} value to be tested.
jaroslav@68	819	* @return {@code true} if the {@code char} value is between
jaroslav@68	820	* {@link #MIN_HIGH_SURROGATE} and
jaroslav@68	821	* {@link #MAX_HIGH_SURROGATE} inclusive;
jaroslav@68	822	* {@code false} otherwise.
jaroslav@68	823	* @see Character#isLowSurrogate(char)
jaroslav@68	824	* @see Character.UnicodeBlock#of(int)
jaroslav@68	825	* @since 1.5
jaroslav@68	826	*/
jaroslav@68	827	public static boolean isHighSurrogate(char ch) {
jaroslav@68	828	// Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
jaroslav@68	829	return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
jaroslav@68	830	}
jaroslav@68	831
jaroslav@68	832	/**
jaroslav@68	833	* Determines if the given {@code char} value is a
jaroslav@68	834	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	835	* Unicode low-surrogate code unit</a>
jaroslav@68	836	* (also known as <i>trailing-surrogate code unit</i>).
jaroslav@68	837	*
jaroslav@68	838	* <p>Such values do not represent characters by themselves,
jaroslav@68	839	* but are used in the representation of
jaroslav@68	840	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	841	* in the UTF-16 encoding.
jaroslav@68	842	*
jaroslav@68	843	* @param ch the {@code char} value to be tested.
jaroslav@68	844	* @return {@code true} if the {@code char} value is between
jaroslav@68	845	* {@link #MIN_LOW_SURROGATE} and
jaroslav@68	846	* {@link #MAX_LOW_SURROGATE} inclusive;
jaroslav@68	847	* {@code false} otherwise.
jaroslav@68	848	* @see Character#isHighSurrogate(char)
jaroslav@68	849	* @since 1.5
jaroslav@68	850	*/
jaroslav@68	851	public static boolean isLowSurrogate(char ch) {
jaroslav@68	852	return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
jaroslav@68	853	}
jaroslav@68	854
jaroslav@68	855	/**
jaroslav@68	856	* Determines if the given {@code char} value is a Unicode
jaroslav@68	857	* <i>surrogate code unit</i>.
jaroslav@68	858	*
jaroslav@68	859	* <p>Such values do not represent characters by themselves,
jaroslav@68	860	* but are used in the representation of
jaroslav@68	861	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	862	* in the UTF-16 encoding.
jaroslav@68	863	*
jaroslav@68	864	* <p>A char value is a surrogate code unit if and only if it is either
jaroslav@68	865	* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
jaroslav@68	866	* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
jaroslav@68	867	*
jaroslav@68	868	* @param ch the {@code char} value to be tested.
jaroslav@68	869	* @return {@code true} if the {@code char} value is between
jaroslav@68	870	* {@link #MIN_SURROGATE} and
jaroslav@68	871	* {@link #MAX_SURROGATE} inclusive;
jaroslav@68	872	* {@code false} otherwise.
jaroslav@68	873	* @since 1.7
jaroslav@68	874	*/
jaroslav@68	875	public static boolean isSurrogate(char ch) {
jaroslav@68	876	return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
jaroslav@68	877	}
jaroslav@68	878
jaroslav@68	879	/**
jaroslav@68	880	* Determines whether the specified pair of {@code char}
jaroslav@68	881	* values is a valid
jaroslav@68	882	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	883	* Unicode surrogate pair</a>.
jaroslav@68	884
jaroslav@68	885	* <p>This method is equivalent to the expression:
jaroslav@68	886	* <blockquote><pre>
jaroslav@68	887	* isHighSurrogate(high) && isLowSurrogate(low)
jaroslav@68	888	* </pre></blockquote>
jaroslav@68	889	*
jaroslav@68	890	* @param high the high-surrogate code value to be tested
jaroslav@68	891	* @param low the low-surrogate code value to be tested
jaroslav@68	892	* @return {@code true} if the specified high and
jaroslav@68	893	* low-surrogate code values represent a valid surrogate pair;
jaroslav@68	894	* {@code false} otherwise.
jaroslav@68	895	* @since 1.5
jaroslav@68	896	*/
jaroslav@68	897	public static boolean isSurrogatePair(char high, char low) {
jaroslav@68	898	return isHighSurrogate(high) && isLowSurrogate(low);
jaroslav@68	899	}
jaroslav@68	900
jaroslav@68	901	/**
jaroslav@68	902	* Determines the number of {@code char} values needed to
jaroslav@68	903	* represent the specified character (Unicode code point). If the
jaroslav@68	904	* specified character is equal to or greater than 0x10000, then
jaroslav@68	905	* the method returns 2. Otherwise, the method returns 1.
jaroslav@68	906	*
jaroslav@68	907	* <p>This method doesn't validate the specified character to be a
jaroslav@68	908	* valid Unicode code point. The caller must validate the
jaroslav@68	909	* character value using {@link #isValidCodePoint(int) isValidCodePoint}
jaroslav@68	910	* if necessary.
jaroslav@68	911	*
jaroslav@68	912	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	913	* @return 2 if the character is a valid supplementary character; 1 otherwise.
jaroslav@68	914	* @see Character#isSupplementaryCodePoint(int)
jaroslav@68	915	* @since 1.5
jaroslav@68	916	*/
jaroslav@68	917	public static int charCount(int codePoint) {
jaroslav@68	918	return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
jaroslav@68	919	}
jaroslav@68	920
jaroslav@68	921	/**
jaroslav@68	922	* Converts the specified surrogate pair to its supplementary code
jaroslav@68	923	* point value. This method does not validate the specified
jaroslav@68	924	* surrogate pair. The caller must validate it using {@link
jaroslav@68	925	* #isSurrogatePair(char, char) isSurrogatePair} if necessary.
jaroslav@68	926	*
jaroslav@68	927	* @param high the high-surrogate code unit
jaroslav@68	928	* @param low the low-surrogate code unit
jaroslav@68	929	* @return the supplementary code point composed from the
jaroslav@68	930	* specified surrogate pair.
jaroslav@68	931	* @since 1.5
jaroslav@68	932	*/
jaroslav@68	933	public static int toCodePoint(char high, char low) {
jaroslav@68	934	// Optimized form of:
jaroslav@68	935	// return ((high - MIN_HIGH_SURROGATE) << 10)
jaroslav@68	936	// + (low - MIN_LOW_SURROGATE)
jaroslav@68	937	// + MIN_SUPPLEMENTARY_CODE_POINT;
jaroslav@68	938	return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68	939	- (MIN_HIGH_SURROGATE << 10)
jaroslav@68	940	- MIN_LOW_SURROGATE);
jaroslav@68	941	}
jaroslav@68	942
jaroslav@68	943	/**
jaroslav@68	944	* Returns the code point at the given index of the
jaroslav@68	945	* {@code CharSequence}. If the {@code char} value at
jaroslav@68	946	* the given index in the {@code CharSequence} is in the
jaroslav@68	947	* high-surrogate range, the following index is less than the
jaroslav@68	948	* length of the {@code CharSequence}, and the
jaroslav@68	949	* {@code char} value at the following index is in the
jaroslav@68	950	* low-surrogate range, then the supplementary code point
jaroslav@68	951	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	952	* the {@code char} value at the given index is returned.
jaroslav@68	953	*
jaroslav@68	954	* @param seq a sequence of {@code char} values (Unicode code
jaroslav@68	955	* units)
jaroslav@68	956	* @param index the index to the {@code char} values (Unicode
jaroslav@68	957	* code units) in {@code seq} to be converted
jaroslav@68	958	* @return the Unicode code point at the given index
jaroslav@68	959	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	960	* @exception IndexOutOfBoundsException if the value
jaroslav@68	961	* {@code index} is negative or not less than
jaroslav@68	962	* {@link CharSequence#length() seq.length()}.
jaroslav@68	963	* @since 1.5
jaroslav@68	964	*/
jaroslav@68	965	public static int codePointAt(CharSequence seq, int index) {
jaroslav@68	966	char c1 = seq.charAt(index++);
jaroslav@68	967	if (isHighSurrogate(c1)) {
jaroslav@68	968	if (index < seq.length()) {
jaroslav@68	969	char c2 = seq.charAt(index);
jaroslav@68	970	if (isLowSurrogate(c2)) {
jaroslav@68	971	return toCodePoint(c1, c2);
jaroslav@68	972	}
jaroslav@68	973	}
jaroslav@68	974	}
jaroslav@68	975	return c1;
jaroslav@68	976	}
jaroslav@68	977
jaroslav@68	978	/**
jaroslav@68	979	* Returns the code point at the given index of the
jaroslav@68	980	* {@code char} array. If the {@code char} value at
jaroslav@68	981	* the given index in the {@code char} array is in the
jaroslav@68	982	* high-surrogate range, the following index is less than the
jaroslav@68	983	* length of the {@code char} array, and the
jaroslav@68	984	* {@code char} value at the following index is in the
jaroslav@68	985	* low-surrogate range, then the supplementary code point
jaroslav@68	986	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	987	* the {@code char} value at the given index is returned.
jaroslav@68	988	*
jaroslav@68	989	* @param a the {@code char} array
jaroslav@68	990	* @param index the index to the {@code char} values (Unicode
jaroslav@68	991	* code units) in the {@code char} array to be converted
jaroslav@68	992	* @return the Unicode code point at the given index
jaroslav@68	993	* @exception NullPointerException if {@code a} is null.
jaroslav@68	994	* @exception IndexOutOfBoundsException if the value
jaroslav@68	995	* {@code index} is negative or not less than
jaroslav@68	996	* the length of the {@code char} array.
jaroslav@68	997	* @since 1.5
jaroslav@68	998	*/
jaroslav@68	999	public static int codePointAt(char[] a, int index) {
jaroslav@68	1000	return codePointAtImpl(a, index, a.length);
jaroslav@68	1001	}
jaroslav@68	1002
jaroslav@68	1003	/**
jaroslav@68	1004	* Returns the code point at the given index of the
jaroslav@68	1005	* {@code char} array, where only array elements with
jaroslav@68	1006	* {@code index} less than {@code limit} can be used. If
jaroslav@68	1007	* the {@code char} value at the given index in the
jaroslav@68	1008	* {@code char} array is in the high-surrogate range, the
jaroslav@68	1009	* following index is less than the {@code limit}, and the
jaroslav@68	1010	* {@code char} value at the following index is in the
jaroslav@68	1011	* low-surrogate range, then the supplementary code point
jaroslav@68	1012	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1013	* the {@code char} value at the given index is returned.
jaroslav@68	1014	*
jaroslav@68	1015	* @param a the {@code char} array
jaroslav@68	1016	* @param index the index to the {@code char} values (Unicode
jaroslav@68	1017	* code units) in the {@code char} array to be converted
jaroslav@68	1018	* @param limit the index after the last array element that
jaroslav@68	1019	* can be used in the {@code char} array
jaroslav@68	1020	* @return the Unicode code point at the given index
jaroslav@68	1021	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1022	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1023	* argument is negative or not less than the {@code limit}
jaroslav@68	1024	* argument, or if the {@code limit} argument is negative or
jaroslav@68	1025	* greater than the length of the {@code char} array.
jaroslav@68	1026	* @since 1.5
jaroslav@68	1027	*/
jaroslav@68	1028	public static int codePointAt(char[] a, int index, int limit) {
jaroslav@68	1029	if (index >= limit \|\| limit < 0 \|\| limit > a.length) {
jaroslav@68	1030	throw new IndexOutOfBoundsException();
jaroslav@68	1031	}
jaroslav@68	1032	return codePointAtImpl(a, index, limit);
jaroslav@68	1033	}
jaroslav@68	1034
jaroslav@68	1035	// throws ArrayIndexOutofBoundsException if index out of bounds
jaroslav@68	1036	static int codePointAtImpl(char[] a, int index, int limit) {
jaroslav@68	1037	char c1 = a[index++];
jaroslav@68	1038	if (isHighSurrogate(c1)) {
jaroslav@68	1039	if (index < limit) {
jaroslav@68	1040	char c2 = a[index];
jaroslav@68	1041	if (isLowSurrogate(c2)) {
jaroslav@68	1042	return toCodePoint(c1, c2);
jaroslav@68	1043	}
jaroslav@68	1044	}
jaroslav@68	1045	}
jaroslav@68	1046	return c1;
jaroslav@68	1047	}
jaroslav@68	1048
jaroslav@68	1049	/**
jaroslav@68	1050	* Returns the code point preceding the given index of the
jaroslav@68	1051	* {@code CharSequence}. If the {@code char} value at
jaroslav@68	1052	* {@code (index - 1)} in the {@code CharSequence} is in
jaroslav@68	1053	* the low-surrogate range, {@code (index - 2)} is not
jaroslav@68	1054	* negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68	1055	* in the {@code CharSequence} is in the
jaroslav@68	1056	* high-surrogate range, then the supplementary code point
jaroslav@68	1057	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1058	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1059	* returned.
jaroslav@68	1060	*
jaroslav@68	1061	* @param seq the {@code CharSequence} instance
jaroslav@68	1062	* @param index the index following the code point that should be returned
jaroslav@68	1063	* @return the Unicode code point value before the given index.
jaroslav@68	1064	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1065	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1066	* argument is less than 1 or greater than {@link
jaroslav@68	1067	* CharSequence#length() seq.length()}.
jaroslav@68	1068	* @since 1.5
jaroslav@68	1069	*/
jaroslav@68	1070	public static int codePointBefore(CharSequence seq, int index) {
jaroslav@68	1071	char c2 = seq.charAt(--index);
jaroslav@68	1072	if (isLowSurrogate(c2)) {
jaroslav@68	1073	if (index > 0) {
jaroslav@68	1074	char c1 = seq.charAt(--index);
jaroslav@68	1075	if (isHighSurrogate(c1)) {
jaroslav@68	1076	return toCodePoint(c1, c2);
jaroslav@68	1077	}
jaroslav@68	1078	}
jaroslav@68	1079	}
jaroslav@68	1080	return c2;
jaroslav@68	1081	}
jaroslav@68	1082
jaroslav@68	1083	/**
jaroslav@68	1084	* Returns the code point preceding the given index of the
jaroslav@68	1085	* {@code char} array. If the {@code char} value at
jaroslav@68	1086	* {@code (index - 1)} in the {@code char} array is in
jaroslav@68	1087	* the low-surrogate range, {@code (index - 2)} is not
jaroslav@68	1088	* negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68	1089	* in the {@code char} array is in the
jaroslav@68	1090	* high-surrogate range, then the supplementary code point
jaroslav@68	1091	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1092	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1093	* returned.
jaroslav@68	1094	*
jaroslav@68	1095	* @param a the {@code char} array
jaroslav@68	1096	* @param index the index following the code point that should be returned
jaroslav@68	1097	* @return the Unicode code point value before the given index.
jaroslav@68	1098	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1099	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1100	* argument is less than 1 or greater than the length of the
jaroslav@68	1101	* {@code char} array
jaroslav@68	1102	* @since 1.5
jaroslav@68	1103	*/
jaroslav@68	1104	public static int codePointBefore(char[] a, int index) {
jaroslav@68	1105	return codePointBeforeImpl(a, index, 0);
jaroslav@68	1106	}
jaroslav@68	1107
jaroslav@68	1108	/**
jaroslav@68	1109	* Returns the code point preceding the given index of the
jaroslav@68	1110	* {@code char} array, where only array elements with
jaroslav@68	1111	* {@code index} greater than or equal to {@code start}
jaroslav@68	1112	* can be used. If the {@code char} value at {@code (index - 1)}
jaroslav@68	1113	* in the {@code char} array is in the
jaroslav@68	1114	* low-surrogate range, {@code (index - 2)} is not less than
jaroslav@68	1115	* {@code start}, and the {@code char} value at
jaroslav@68	1116	* {@code (index - 2)} in the {@code char} array is in
jaroslav@68	1117	* the high-surrogate range, then the supplementary code point
jaroslav@68	1118	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1119	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1120	* returned.
jaroslav@68	1121	*
jaroslav@68	1122	* @param a the {@code char} array
jaroslav@68	1123	* @param index the index following the code point that should be returned
jaroslav@68	1124	* @param start the index of the first array element in the
jaroslav@68	1125	* {@code char} array
jaroslav@68	1126	* @return the Unicode code point value before the given index.
jaroslav@68	1127	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1128	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1129	* argument is not greater than the {@code start} argument or
jaroslav@68	1130	* is greater than the length of the {@code char} array, or
jaroslav@68	1131	* if the {@code start} argument is negative or not less than
jaroslav@68	1132	* the length of the {@code char} array.
jaroslav@68	1133	* @since 1.5
jaroslav@68	1134	*/
jaroslav@68	1135	public static int codePointBefore(char[] a, int index, int start) {
jaroslav@68	1136	if (index <= start \|\| start < 0 \|\| start >= a.length) {
jaroslav@68	1137	throw new IndexOutOfBoundsException();
jaroslav@68	1138	}
jaroslav@68	1139	return codePointBeforeImpl(a, index, start);
jaroslav@68	1140	}
jaroslav@68	1141
jaroslav@68	1142	// throws ArrayIndexOutofBoundsException if index-1 out of bounds
jaroslav@68	1143	static int codePointBeforeImpl(char[] a, int index, int start) {
jaroslav@68	1144	char c2 = a[--index];
jaroslav@68	1145	if (isLowSurrogate(c2)) {
jaroslav@68	1146	if (index > start) {
jaroslav@68	1147	char c1 = a[--index];
jaroslav@68	1148	if (isHighSurrogate(c1)) {
jaroslav@68	1149	return toCodePoint(c1, c2);
jaroslav@68	1150	}
jaroslav@68	1151	}
jaroslav@68	1152	}
jaroslav@68	1153	return c2;
jaroslav@68	1154	}
jaroslav@68	1155
jaroslav@68	1156	/**
jaroslav@68	1157	* Returns the leading surrogate (a
jaroslav@68	1158	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	1159	* high surrogate code unit</a>) of the
jaroslav@68	1160	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	1161	* surrogate pair</a>
jaroslav@68	1162	* representing the specified supplementary character (Unicode
jaroslav@68	1163	* code point) in the UTF-16 encoding. If the specified character
jaroslav@68	1164	* is not a
jaroslav@68	1165	* <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68	1166	* an unspecified {@code char} is returned.
jaroslav@68	1167	*
jaroslav@68	1168	* <p>If
jaroslav@68	1169	* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68	1170	* is {@code true}, then
jaroslav@68	1171	* {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
jaroslav@68	1172	* {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
jaroslav@68	1173	* are also always {@code true}.
jaroslav@68	1174	*
jaroslav@68	1175	* @param codePoint a supplementary character (Unicode code point)
jaroslav@68	1176	* @return the leading surrogate code unit used to represent the
jaroslav@68	1177	* character in the UTF-16 encoding
jaroslav@68	1178	* @since 1.7
jaroslav@68	1179	*/
jaroslav@68	1180	public static char highSurrogate(int codePoint) {
jaroslav@68	1181	return (char) ((codePoint >>> 10)
jaroslav@68	1182	+ (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
jaroslav@68	1183	}
jaroslav@68	1184
jaroslav@68	1185	/**
jaroslav@68	1186	* Returns the trailing surrogate (a
jaroslav@68	1187	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	1188	* low surrogate code unit</a>) of the
jaroslav@68	1189	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	1190	* surrogate pair</a>
jaroslav@68	1191	* representing the specified supplementary character (Unicode
jaroslav@68	1192	* code point) in the UTF-16 encoding. If the specified character
jaroslav@68	1193	* is not a
jaroslav@68	1194	* <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68	1195	* an unspecified {@code char} is returned.
jaroslav@68	1196	*
jaroslav@68	1197	* <p>If
jaroslav@68	1198	* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68	1199	* is {@code true}, then
jaroslav@68	1200	* {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
jaroslav@68	1201	* {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
jaroslav@68	1202	* are also always {@code true}.
jaroslav@68	1203	*
jaroslav@68	1204	* @param codePoint a supplementary character (Unicode code point)
jaroslav@68	1205	* @return the trailing surrogate code unit used to represent the
jaroslav@68	1206	* character in the UTF-16 encoding
jaroslav@68	1207	* @since 1.7
jaroslav@68	1208	*/
jaroslav@68	1209	public static char lowSurrogate(int codePoint) {
jaroslav@68	1210	return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
jaroslav@68	1211	}
jaroslav@68	1212
jaroslav@68	1213	/**
jaroslav@68	1214	* Converts the specified character (Unicode code point) to its
jaroslav@68	1215	* UTF-16 representation. If the specified code point is a BMP
jaroslav@68	1216	* (Basic Multilingual Plane or Plane 0) value, the same value is
jaroslav@68	1217	* stored in {@code dst[dstIndex]}, and 1 is returned. If the
jaroslav@68	1218	* specified code point is a supplementary character, its
jaroslav@68	1219	* surrogate values are stored in {@code dst[dstIndex]}
jaroslav@68	1220	* (high-surrogate) and {@code dst[dstIndex+1]}
jaroslav@68	1221	* (low-surrogate), and 2 is returned.
jaroslav@68	1222	*
jaroslav@68	1223	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	1224	* @param dst an array of {@code char} in which the
jaroslav@68	1225	* {@code codePoint}'s UTF-16 value is stored.
jaroslav@68	1226	* @param dstIndex the start index into the {@code dst}
jaroslav@68	1227	* array where the converted value is stored.
jaroslav@68	1228	* @return 1 if the code point is a BMP code point, 2 if the
jaroslav@68	1229	* code point is a supplementary code point.
jaroslav@68	1230	* @exception IllegalArgumentException if the specified
jaroslav@68	1231	* {@code codePoint} is not a valid Unicode code point.
jaroslav@68	1232	* @exception NullPointerException if the specified {@code dst} is null.
jaroslav@68	1233	* @exception IndexOutOfBoundsException if {@code dstIndex}
jaroslav@68	1234	* is negative or not less than {@code dst.length}, or if
jaroslav@68	1235	* {@code dst} at {@code dstIndex} doesn't have enough
jaroslav@68	1236	* array element(s) to store the resulting {@code char}
jaroslav@68	1237	* value(s). (If {@code dstIndex} is equal to
jaroslav@68	1238	* {@code dst.length-1} and the specified
jaroslav@68	1239	* {@code codePoint} is a supplementary character, the
jaroslav@68	1240	* high-surrogate value is not stored in
jaroslav@68	1241	* {@code dst[dstIndex]}.)
jaroslav@68	1242	* @since 1.5
jaroslav@68	1243	*/
jaroslav@68	1244	public static int toChars(int codePoint, char[] dst, int dstIndex) {
jaroslav@68	1245	if (isBmpCodePoint(codePoint)) {
jaroslav@68	1246	dst[dstIndex] = (char) codePoint;
jaroslav@68	1247	return 1;
jaroslav@68	1248	} else if (isValidCodePoint(codePoint)) {
jaroslav@68	1249	toSurrogates(codePoint, dst, dstIndex);
jaroslav@68	1250	return 2;
jaroslav@68	1251	} else {
jaroslav@68	1252	throw new IllegalArgumentException();
jaroslav@68	1253	}
jaroslav@68	1254	}
jaroslav@68	1255
jaroslav@68	1256	/**
jaroslav@68	1257	* Converts the specified character (Unicode code point) to its
jaroslav@68	1258	* UTF-16 representation stored in a {@code char} array. If
jaroslav@68	1259	* the specified code point is a BMP (Basic Multilingual Plane or
jaroslav@68	1260	* Plane 0) value, the resulting {@code char} array has
jaroslav@68	1261	* the same value as {@code codePoint}. If the specified code
jaroslav@68	1262	* point is a supplementary code point, the resulting
jaroslav@68	1263	* {@code char} array has the corresponding surrogate pair.
jaroslav@68	1264	*
jaroslav@68	1265	* @param codePoint a Unicode code point
jaroslav@68	1266	* @return a {@code char} array having
jaroslav@68	1267	* {@code codePoint}'s UTF-16 representation.
jaroslav@68	1268	* @exception IllegalArgumentException if the specified
jaroslav@68	1269	* {@code codePoint} is not a valid Unicode code point.
jaroslav@68	1270	* @since 1.5
jaroslav@68	1271	*/
jaroslav@68	1272	public static char[] toChars(int codePoint) {
jaroslav@68	1273	if (isBmpCodePoint(codePoint)) {
jaroslav@68	1274	return new char[] { (char) codePoint };
jaroslav@68	1275	} else if (isValidCodePoint(codePoint)) {
jaroslav@68	1276	char[] result = new char[2];
jaroslav@68	1277	toSurrogates(codePoint, result, 0);
jaroslav@68	1278	return result;
jaroslav@68	1279	} else {
jaroslav@68	1280	throw new IllegalArgumentException();
jaroslav@68	1281	}
jaroslav@68	1282	}
jaroslav@68	1283
jaroslav@68	1284	static void toSurrogates(int codePoint, char[] dst, int index) {
jaroslav@68	1285	// We write elements "backwards" to guarantee all-or-nothing
jaroslav@68	1286	dst[index+1] = lowSurrogate(codePoint);
jaroslav@68	1287	dst[index] = highSurrogate(codePoint);
jaroslav@68	1288	}
jaroslav@68	1289
jaroslav@68	1290	/**
jaroslav@68	1291	* Returns the number of Unicode code points in the text range of
jaroslav@68	1292	* the specified char sequence. The text range begins at the
jaroslav@68	1293	* specified {@code beginIndex} and extends to the
jaroslav@68	1294	* {@code char} at index {@code endIndex - 1}. Thus the
jaroslav@68	1295	* length (in {@code char}s) of the text range is
jaroslav@68	1296	* {@code endIndex-beginIndex}. Unpaired surrogates within
jaroslav@68	1297	* the text range count as one code point each.
jaroslav@68	1298	*
jaroslav@68	1299	* @param seq the char sequence
jaroslav@68	1300	* @param beginIndex the index to the first {@code char} of
jaroslav@68	1301	* the text range.
jaroslav@68	1302	* @param endIndex the index after the last {@code char} of
jaroslav@68	1303	* the text range.
jaroslav@68	1304	* @return the number of Unicode code points in the specified text
jaroslav@68	1305	* range
jaroslav@68	1306	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1307	* @exception IndexOutOfBoundsException if the
jaroslav@68	1308	* {@code beginIndex} is negative, or {@code endIndex}
jaroslav@68	1309	* is larger than the length of the given sequence, or
jaroslav@68	1310	* {@code beginIndex} is larger than {@code endIndex}.
jaroslav@68	1311	* @since 1.5
jaroslav@68	1312	*/
jaroslav@68	1313	public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
jaroslav@68	1314	int length = seq.length();
jaroslav@68	1315	if (beginIndex < 0 \|\| endIndex > length \|\| beginIndex > endIndex) {
jaroslav@68	1316	throw new IndexOutOfBoundsException();
jaroslav@68	1317	}
jaroslav@68	1318	int n = endIndex - beginIndex;
jaroslav@68	1319	for (int i = beginIndex; i < endIndex; ) {
jaroslav@68	1320	if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
jaroslav@68	1321	isLowSurrogate(seq.charAt(i))) {
jaroslav@68	1322	n--;
jaroslav@68	1323	i++;
jaroslav@68	1324	}
jaroslav@68	1325	}
jaroslav@68	1326	return n;
jaroslav@68	1327	}
jaroslav@68	1328
jaroslav@68	1329	/**
jaroslav@68	1330	* Returns the number of Unicode code points in a subarray of the
jaroslav@68	1331	* {@code char} array argument. The {@code offset}
jaroslav@68	1332	* argument is the index of the first {@code char} of the
jaroslav@68	1333	* subarray and the {@code count} argument specifies the
jaroslav@68	1334	* length of the subarray in {@code char}s. Unpaired
jaroslav@68	1335	* surrogates within the subarray count as one code point each.
jaroslav@68	1336	*
jaroslav@68	1337	* @param a the {@code char} array
jaroslav@68	1338	* @param offset the index of the first {@code char} in the
jaroslav@68	1339	* given {@code char} array
jaroslav@68	1340	* @param count the length of the subarray in {@code char}s
jaroslav@68	1341	* @return the number of Unicode code points in the specified subarray
jaroslav@68	1342	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1343	* @exception IndexOutOfBoundsException if {@code offset} or
jaroslav@68	1344	* {@code count} is negative, or if {@code offset +
jaroslav@68	1345	* count} is larger than the length of the given array.
jaroslav@68	1346	* @since 1.5
jaroslav@68	1347	*/
jaroslav@68	1348	public static int codePointCount(char[] a, int offset, int count) {
jaroslav@68	1349	if (count > a.length - offset \|\| offset < 0 \|\| count < 0) {
jaroslav@68	1350	throw new IndexOutOfBoundsException();
jaroslav@68	1351	}
jaroslav@68	1352	return codePointCountImpl(a, offset, count);
jaroslav@68	1353	}
jaroslav@68	1354
jaroslav@68	1355	static int codePointCountImpl(char[] a, int offset, int count) {
jaroslav@68	1356	int endIndex = offset + count;
jaroslav@68	1357	int n = count;
jaroslav@68	1358	for (int i = offset; i < endIndex; ) {
jaroslav@68	1359	if (isHighSurrogate(a[i++]) && i < endIndex &&
jaroslav@68	1360	isLowSurrogate(a[i])) {
jaroslav@68	1361	n--;
jaroslav@68	1362	i++;
jaroslav@68	1363	}
jaroslav@68	1364	}
jaroslav@68	1365	return n;
jaroslav@68	1366	}
jaroslav@68	1367
jaroslav@68	1368	/**
jaroslav@68	1369	* Returns the index within the given char sequence that is offset
jaroslav@68	1370	* from the given {@code index} by {@code codePointOffset}
jaroslav@68	1371	* code points. Unpaired surrogates within the text range given by
jaroslav@68	1372	* {@code index} and {@code codePointOffset} count as
jaroslav@68	1373	* one code point each.
jaroslav@68	1374	*
jaroslav@68	1375	* @param seq the char sequence
jaroslav@68	1376	* @param index the index to be offset
jaroslav@68	1377	* @param codePointOffset the offset in code points
jaroslav@68	1378	* @return the index within the char sequence
jaroslav@68	1379	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1380	* @exception IndexOutOfBoundsException if {@code index}
jaroslav@68	1381	* is negative or larger then the length of the char sequence,
jaroslav@68	1382	* or if {@code codePointOffset} is positive and the
jaroslav@68	1383	* subsequence starting with {@code index} has fewer than
jaroslav@68	1384	* {@code codePointOffset} code points, or if
jaroslav@68	1385	* {@code codePointOffset} is negative and the subsequence
jaroslav@68	1386	* before {@code index} has fewer than the absolute value
jaroslav@68	1387	* of {@code codePointOffset} code points.
jaroslav@68	1388	* @since 1.5
jaroslav@68	1389	*/
jaroslav@68	1390	public static int offsetByCodePoints(CharSequence seq, int index,
jaroslav@68	1391	int codePointOffset) {
jaroslav@68	1392	int length = seq.length();
jaroslav@68	1393	if (index < 0 \|\| index > length) {
jaroslav@68	1394	throw new IndexOutOfBoundsException();
jaroslav@68	1395	}
jaroslav@68	1396
jaroslav@68	1397	int x = index;
jaroslav@68	1398	if (codePointOffset >= 0) {
jaroslav@68	1399	int i;
jaroslav@68	1400	for (i = 0; x < length && i < codePointOffset; i++) {
jaroslav@68	1401	if (isHighSurrogate(seq.charAt(x++)) && x < length &&
jaroslav@68	1402	isLowSurrogate(seq.charAt(x))) {
jaroslav@68	1403	x++;
jaroslav@68	1404	}
jaroslav@68	1405	}
jaroslav@68	1406	if (i < codePointOffset) {
jaroslav@68	1407	throw new IndexOutOfBoundsException();
jaroslav@68	1408	}
jaroslav@68	1409	} else {
jaroslav@68	1410	int i;
jaroslav@68	1411	for (i = codePointOffset; x > 0 && i < 0; i++) {
jaroslav@68	1412	if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
jaroslav@68	1413	isHighSurrogate(seq.charAt(x-1))) {
jaroslav@68	1414	x--;
jaroslav@68	1415	}
jaroslav@68	1416	}
jaroslav@68	1417	if (i < 0) {
jaroslav@68	1418	throw new IndexOutOfBoundsException();
jaroslav@68	1419	}
jaroslav@68	1420	}
jaroslav@68	1421	return x;
jaroslav@68	1422	}
jaroslav@68	1423
jaroslav@68	1424	/**
jaroslav@68	1425	* Returns the index within the given {@code char} subarray
jaroslav@68	1426	* that is offset from the given {@code index} by
jaroslav@68	1427	* {@code codePointOffset} code points. The
jaroslav@68	1428	* {@code start} and {@code count} arguments specify a
jaroslav@68	1429	* subarray of the {@code char} array. Unpaired surrogates
jaroslav@68	1430	* within the text range given by {@code index} and
jaroslav@68	1431	* {@code codePointOffset} count as one code point each.
jaroslav@68	1432	*
jaroslav@68	1433	* @param a the {@code char} array
jaroslav@68	1434	* @param start the index of the first {@code char} of the
jaroslav@68	1435	* subarray
jaroslav@68	1436	* @param count the length of the subarray in {@code char}s
jaroslav@68	1437	* @param index the index to be offset
jaroslav@68	1438	* @param codePointOffset the offset in code points
jaroslav@68	1439	* @return the index within the subarray
jaroslav@68	1440	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1441	* @exception IndexOutOfBoundsException
jaroslav@68	1442	* if {@code start} or {@code count} is negative,
jaroslav@68	1443	* or if {@code start + count} is larger than the length of
jaroslav@68	1444	* the given array,
jaroslav@68	1445	* or if {@code index} is less than {@code start} or
jaroslav@68	1446	* larger then {@code start + count},
jaroslav@68	1447	* or if {@code codePointOffset} is positive and the text range
jaroslav@68	1448	* starting with {@code index} and ending with {@code start + count - 1}
jaroslav@68	1449	* has fewer than {@code codePointOffset} code
jaroslav@68	1450	* points,
jaroslav@68	1451	* or if {@code codePointOffset} is negative and the text range
jaroslav@68	1452	* starting with {@code start} and ending with {@code index - 1}
jaroslav@68	1453	* has fewer than the absolute value of
jaroslav@68	1454	* {@code codePointOffset} code points.
jaroslav@68	1455	* @since 1.5
jaroslav@68	1456	*/
jaroslav@68	1457	public static int offsetByCodePoints(char[] a, int start, int count,
jaroslav@68	1458	int index, int codePointOffset) {
jaroslav@68	1459	if (count > a.length-start \|\| start < 0 \|\| count < 0
jaroslav@68	1460	\|\| index < start \|\| index > start+count) {
jaroslav@68	1461	throw new IndexOutOfBoundsException();
jaroslav@68	1462	}
jaroslav@68	1463	return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
jaroslav@68	1464	}
jaroslav@68	1465
jaroslav@68	1466	static int offsetByCodePointsImpl(char[]a, int start, int count,
jaroslav@68	1467	int index, int codePointOffset) {
jaroslav@68	1468	int x = index;
jaroslav@68	1469	if (codePointOffset >= 0) {
jaroslav@68	1470	int limit = start + count;
jaroslav@68	1471	int i;
jaroslav@68	1472	for (i = 0; x < limit && i < codePointOffset; i++) {
jaroslav@68	1473	if (isHighSurrogate(a[x++]) && x < limit &&
jaroslav@68	1474	isLowSurrogate(a[x])) {
jaroslav@68	1475	x++;
jaroslav@68	1476	}
jaroslav@68	1477	}
jaroslav@68	1478	if (i < codePointOffset) {
jaroslav@68	1479	throw new IndexOutOfBoundsException();
jaroslav@68	1480	}
jaroslav@68	1481	} else {
jaroslav@68	1482	int i;
jaroslav@68	1483	for (i = codePointOffset; x > start && i < 0; i++) {
jaroslav@68	1484	if (isLowSurrogate(a[--x]) && x > start &&
jaroslav@68	1485	isHighSurrogate(a[x-1])) {
jaroslav@68	1486	x--;
jaroslav@68	1487	}
jaroslav@68	1488	}
jaroslav@68	1489	if (i < 0) {
jaroslav@68	1490	throw new IndexOutOfBoundsException();
jaroslav@68	1491	}
jaroslav@68	1492	}
jaroslav@68	1493	return x;
jaroslav@68	1494	}
jaroslav@68	1495
jaroslav@68	1496	/**
jaroslav@68	1497	* Determines if the specified character is a lowercase character.
jaroslav@68	1498	* <p>
jaroslav@68	1499	* A character is lowercase if its general category type, provided
jaroslav@68	1500	* by {@code Character.getType(ch)}, is
jaroslav@68	1501	* {@code LOWERCASE_LETTER}, or it has contributory property
jaroslav@68	1502	* Other_Lowercase as defined by the Unicode Standard.
jaroslav@68	1503	* <p>
jaroslav@68	1504	* The following are examples of lowercase characters:
jaroslav@68	1505	* <p><blockquote><pre>
jaroslav@68	1506	* a b c d e f g h i j k l m n o p q r s t u v w x y z
jaroslav@68	1507	* '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
jaroslav@68	1508	* '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
jaroslav@68	1509	* '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
jaroslav@68	1510	* '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
jaroslav@68	1511	* </pre></blockquote>
jaroslav@68	1512	* <p> Many other Unicode characters are lowercase too.
jaroslav@68	1513	*
jaroslav@68	1514	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1515	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1516	* all Unicode characters, including supplementary characters, use
jaroslav@68	1517	* the {@link #isLowerCase(int)} method.
jaroslav@68	1518	*
jaroslav@68	1519	* @param ch the character to be tested.
jaroslav@68	1520	* @return {@code true} if the character is lowercase;
jaroslav@68	1521	* {@code false} otherwise.
jaroslav@68	1522	* @see Character#isLowerCase(char)
jaroslav@68	1523	* @see Character#isTitleCase(char)
jaroslav@68	1524	* @see Character#toLowerCase(char)
jaroslav@68	1525	* @see Character#getType(char)
jaroslav@68	1526	*/
jaroslav@68	1527	public static boolean isLowerCase(char ch) {
jaroslav@85	1528	throw new UnsupportedOperationException();
jaroslav@68	1529	}
jaroslav@68	1530
jaroslav@68	1531	/**
jaroslav@68	1532	* Determines if the specified character is an uppercase character.
jaroslav@68	1533	* <p>
jaroslav@68	1534	* A character is uppercase if its general category type, provided by
jaroslav@68	1535	* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
jaroslav@68	1536	* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
jaroslav@68	1537	* <p>
jaroslav@68	1538	* The following are examples of uppercase characters:
jaroslav@68	1539	* <p><blockquote><pre>
jaroslav@68	1540	* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
jaroslav@68	1541	* '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
jaroslav@68	1542	* '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
jaroslav@68	1543	* '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
jaroslav@68	1544	* '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
jaroslav@68	1545	* </pre></blockquote>
jaroslav@68	1546	* <p> Many other Unicode characters are uppercase too.<p>
jaroslav@68	1547	*
jaroslav@68	1548	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1549	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1550	* all Unicode characters, including supplementary characters, use
jaroslav@68	1551	* the {@link #isUpperCase(int)} method.
jaroslav@68	1552	*
jaroslav@68	1553	* @param ch the character to be tested.
jaroslav@68	1554	* @return {@code true} if the character is uppercase;
jaroslav@68	1555	* {@code false} otherwise.
jaroslav@68	1556	* @see Character#isLowerCase(char)
jaroslav@68	1557	* @see Character#isTitleCase(char)
jaroslav@68	1558	* @see Character#toUpperCase(char)
jaroslav@68	1559	* @see Character#getType(char)
jaroslav@68	1560	* @since 1.0
jaroslav@68	1561	*/
jaroslav@68	1562	public static boolean isUpperCase(char ch) {
jaroslav@85	1563	throw new UnsupportedOperationException();
jaroslav@68	1564	}
jaroslav@68	1565
jaroslav@68	1566	/**
jaroslav@68	1567	* Determines if the specified character is a titlecase character.
jaroslav@68	1568	* <p>
jaroslav@68	1569	* A character is a titlecase character if its general
jaroslav@68	1570	* category type, provided by {@code Character.getType(ch)},
jaroslav@68	1571	* is {@code TITLECASE_LETTER}.
jaroslav@68	1572	* <p>
jaroslav@68	1573	* Some characters look like pairs of Latin letters. For example, there
jaroslav@68	1574	* is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68	1575	* lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68	1576	* is the appropriate form to use when rendering a word in lowercase
jaroslav@68	1577	* with initial capitals, as for a book title.
jaroslav@68	1578	* <p>
jaroslav@68	1579	* These are some of the Unicode characters for which this method returns
jaroslav@68	1580	* {@code true}:
jaroslav@68	1581	* <ul>
jaroslav@68	1582	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68	1583	* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68	1584	* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68	1585	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68	1586	* </ul>
jaroslav@68	1587	* <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68	1588	*
jaroslav@68	1589	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1590	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1591	* all Unicode characters, including supplementary characters, use
jaroslav@68	1592	* the {@link #isTitleCase(int)} method.
jaroslav@68	1593	*
jaroslav@68	1594	* @param ch the character to be tested.
jaroslav@68	1595	* @return {@code true} if the character is titlecase;
jaroslav@68	1596	* {@code false} otherwise.
jaroslav@68	1597	* @see Character#isLowerCase(char)
jaroslav@68	1598	* @see Character#isUpperCase(char)
jaroslav@68	1599	* @see Character#toTitleCase(char)
jaroslav@68	1600	* @see Character#getType(char)
jaroslav@68	1601	* @since 1.0.2
jaroslav@68	1602	*/
jaroslav@68	1603	public static boolean isTitleCase(char ch) {
jaroslav@68	1604	return isTitleCase((int)ch);
jaroslav@68	1605	}
jaroslav@68	1606
jaroslav@68	1607	/**
jaroslav@68	1608	* Determines if the specified character (Unicode code point) is a titlecase character.
jaroslav@68	1609	* <p>
jaroslav@68	1610	* A character is a titlecase character if its general
jaroslav@68	1611	* category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68	1612	* is {@code TITLECASE_LETTER}.
jaroslav@68	1613	* <p>
jaroslav@68	1614	* Some characters look like pairs of Latin letters. For example, there
jaroslav@68	1615	* is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68	1616	* lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68	1617	* is the appropriate form to use when rendering a word in lowercase
jaroslav@68	1618	* with initial capitals, as for a book title.
jaroslav@68	1619	* <p>
jaroslav@68	1620	* These are some of the Unicode characters for which this method returns
jaroslav@68	1621	* {@code true}:
jaroslav@68	1622	* <ul>
jaroslav@68	1623	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68	1624	* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68	1625	* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68	1626	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68	1627	* </ul>
jaroslav@68	1628	* <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68	1629	*
jaroslav@68	1630	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1631	* @return {@code true} if the character is titlecase;
jaroslav@68	1632	* {@code false} otherwise.
jaroslav@68	1633	* @see Character#isLowerCase(int)
jaroslav@68	1634	* @see Character#isUpperCase(int)
jaroslav@68	1635	* @see Character#toTitleCase(int)
jaroslav@68	1636	* @see Character#getType(int)
jaroslav@68	1637	* @since 1.5
jaroslav@68	1638	*/
jaroslav@68	1639	public static boolean isTitleCase(int codePoint) {
jaroslav@68	1640	return getType(codePoint) == Character.TITLECASE_LETTER;
jaroslav@68	1641	}
jaroslav@68	1642
jaroslav@68	1643	/**
jaroslav@68	1644	* Determines if the specified character is a digit.
jaroslav@68	1645	* <p>
jaroslav@68	1646	* A character is a digit if its general category type, provided
jaroslav@68	1647	* by {@code Character.getType(ch)}, is
jaroslav@68	1648	* {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68	1649	* <p>
jaroslav@68	1650	* Some Unicode character ranges that contain digits:
jaroslav@68	1651	* <ul>
jaroslav@68	1652	* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68	1653	* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68	1654	* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68	1655	* Arabic-Indic digits
jaroslav@68	1656	* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68	1657	* Extended Arabic-Indic digits
jaroslav@68	1658	* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68	1659	* Devanagari digits
jaroslav@68	1660	* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68	1661	* Fullwidth digits
jaroslav@68	1662	* </ul>
jaroslav@68	1663	*
jaroslav@68	1664	* Many other character ranges contain digits as well.
jaroslav@68	1665	*
jaroslav@68	1666	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1667	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1668	* all Unicode characters, including supplementary characters, use
jaroslav@68	1669	* the {@link #isDigit(int)} method.
jaroslav@68	1670	*
jaroslav@68	1671	* @param ch the character to be tested.
jaroslav@68	1672	* @return {@code true} if the character is a digit;
jaroslav@68	1673	* {@code false} otherwise.
jaroslav@68	1674	* @see Character#digit(char, int)
jaroslav@68	1675	* @see Character#forDigit(int, int)
jaroslav@68	1676	* @see Character#getType(char)
jaroslav@68	1677	*/
jaroslav@68	1678	public static boolean isDigit(char ch) {
jaroslav@68	1679	return isDigit((int)ch);
jaroslav@68	1680	}
jaroslav@68	1681
jaroslav@68	1682	/**
jaroslav@68	1683	* Determines if the specified character (Unicode code point) is a digit.
jaroslav@68	1684	* <p>
jaroslav@68	1685	* A character is a digit if its general category type, provided
jaroslav@68	1686	* by {@link Character#getType(int) getType(codePoint)}, is
jaroslav@68	1687	* {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68	1688	* <p>
jaroslav@68	1689	* Some Unicode character ranges that contain digits:
jaroslav@68	1690	* <ul>
jaroslav@68	1691	* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68	1692	* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68	1693	* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68	1694	* Arabic-Indic digits
jaroslav@68	1695	* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68	1696	* Extended Arabic-Indic digits
jaroslav@68	1697	* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68	1698	* Devanagari digits
jaroslav@68	1699	* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68	1700	* Fullwidth digits
jaroslav@68	1701	* </ul>
jaroslav@68	1702	*
jaroslav@68	1703	* Many other character ranges contain digits as well.
jaroslav@68	1704	*
jaroslav@68	1705	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1706	* @return {@code true} if the character is a digit;
jaroslav@68	1707	* {@code false} otherwise.
jaroslav@68	1708	* @see Character#forDigit(int, int)
jaroslav@68	1709	* @see Character#getType(int)
jaroslav@68	1710	* @since 1.5
jaroslav@68	1711	*/
jaroslav@68	1712	public static boolean isDigit(int codePoint) {
jaroslav@68	1713	return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
jaroslav@68	1714	}
jaroslav@68	1715
jaroslav@68	1716	/**
jaroslav@68	1717	* Determines if a character is defined in Unicode.
jaroslav@68	1718	* <p>
jaroslav@68	1719	* A character is defined if at least one of the following is true:
jaroslav@68	1720	* <ul>
jaroslav@68	1721	* <li>It has an entry in the UnicodeData file.
jaroslav@68	1722	* <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68	1723	* </ul>
jaroslav@68	1724	*
jaroslav@68	1725	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1726	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1727	* all Unicode characters, including supplementary characters, use
jaroslav@68	1728	* the {@link #isDefined(int)} method.
jaroslav@68	1729	*
jaroslav@68	1730	* @param ch the character to be tested
jaroslav@68	1731	* @return {@code true} if the character has a defined meaning
jaroslav@68	1732	* in Unicode; {@code false} otherwise.
jaroslav@68	1733	* @see Character#isDigit(char)
jaroslav@68	1734	* @see Character#isLetter(char)
jaroslav@68	1735	* @see Character#isLetterOrDigit(char)
jaroslav@68	1736	* @see Character#isLowerCase(char)
jaroslav@68	1737	* @see Character#isTitleCase(char)
jaroslav@68	1738	* @see Character#isUpperCase(char)
jaroslav@68	1739	* @since 1.0.2
jaroslav@68	1740	*/
jaroslav@68	1741	public static boolean isDefined(char ch) {
jaroslav@68	1742	return isDefined((int)ch);
jaroslav@68	1743	}
jaroslav@68	1744
jaroslav@68	1745	/**
jaroslav@68	1746	* Determines if a character (Unicode code point) is defined in Unicode.
jaroslav@68	1747	* <p>
jaroslav@68	1748	* A character is defined if at least one of the following is true:
jaroslav@68	1749	* <ul>
jaroslav@68	1750	* <li>It has an entry in the UnicodeData file.
jaroslav@68	1751	* <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68	1752	* </ul>
jaroslav@68	1753	*
jaroslav@68	1754	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1755	* @return {@code true} if the character has a defined meaning
jaroslav@68	1756	* in Unicode; {@code false} otherwise.
jaroslav@68	1757	* @see Character#isDigit(int)
jaroslav@68	1758	* @see Character#isLetter(int)
jaroslav@68	1759	* @see Character#isLetterOrDigit(int)
jaroslav@68	1760	* @see Character#isLowerCase(int)
jaroslav@68	1761	* @see Character#isTitleCase(int)
jaroslav@68	1762	* @see Character#isUpperCase(int)
jaroslav@68	1763	* @since 1.5
jaroslav@68	1764	*/
jaroslav@68	1765	public static boolean isDefined(int codePoint) {
jaroslav@68	1766	return getType(codePoint) != Character.UNASSIGNED;
jaroslav@68	1767	}
jaroslav@68	1768
jaroslav@68	1769	/**
jaroslav@68	1770	* Determines if the specified character is a letter.
jaroslav@68	1771	* <p>
jaroslav@68	1772	* A character is considered to be a letter if its general
jaroslav@68	1773	* category type, provided by {@code Character.getType(ch)},
jaroslav@68	1774	* is any of the following:
jaroslav@68	1775	* <ul>
jaroslav@68	1776	* <li> {@code UPPERCASE_LETTER}
jaroslav@68	1777	* <li> {@code LOWERCASE_LETTER}
jaroslav@68	1778	* <li> {@code TITLECASE_LETTER}
jaroslav@68	1779	* <li> {@code MODIFIER_LETTER}
jaroslav@68	1780	* <li> {@code OTHER_LETTER}
jaroslav@68	1781	* </ul>
jaroslav@68	1782	*
jaroslav@68	1783	* Not all letters have case. Many characters are
jaroslav@68	1784	* letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68	1785	*
jaroslav@68	1786	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1787	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1788	* all Unicode characters, including supplementary characters, use
jaroslav@68	1789	* the {@link #isLetter(int)} method.
jaroslav@68	1790	*
jaroslav@68	1791	* @param ch the character to be tested.
jaroslav@68	1792	* @return {@code true} if the character is a letter;
jaroslav@68	1793	* {@code false} otherwise.
jaroslav@68	1794	* @see Character#isDigit(char)
jaroslav@68	1795	* @see Character#isJavaIdentifierStart(char)
jaroslav@68	1796	* @see Character#isJavaLetter(char)
jaroslav@68	1797	* @see Character#isJavaLetterOrDigit(char)
jaroslav@68	1798	* @see Character#isLetterOrDigit(char)
jaroslav@68	1799	* @see Character#isLowerCase(char)
jaroslav@68	1800	* @see Character#isTitleCase(char)
jaroslav@68	1801	* @see Character#isUnicodeIdentifierStart(char)
jaroslav@68	1802	* @see Character#isUpperCase(char)
jaroslav@68	1803	*/
jaroslav@68	1804	public static boolean isLetter(char ch) {
jaroslav@68	1805	return isLetter((int)ch);
jaroslav@68	1806	}
jaroslav@68	1807
jaroslav@68	1808	/**
jaroslav@68	1809	* Determines if the specified character (Unicode code point) is a letter.
jaroslav@68	1810	* <p>
jaroslav@68	1811	* A character is considered to be a letter if its general
jaroslav@68	1812	* category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68	1813	* is any of the following:
jaroslav@68	1814	* <ul>
jaroslav@68	1815	* <li> {@code UPPERCASE_LETTER}
jaroslav@68	1816	* <li> {@code LOWERCASE_LETTER}
jaroslav@68	1817	* <li> {@code TITLECASE_LETTER}
jaroslav@68	1818	* <li> {@code MODIFIER_LETTER}
jaroslav@68	1819	* <li> {@code OTHER_LETTER}
jaroslav@68	1820	* </ul>
jaroslav@68	1821	*
jaroslav@68	1822	* Not all letters have case. Many characters are
jaroslav@68	1823	* letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68	1824	*
jaroslav@68	1825	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1826	* @return {@code true} if the character is a letter;
jaroslav@68	1827	* {@code false} otherwise.
jaroslav@68	1828	* @see Character#isDigit(int)
jaroslav@68	1829	* @see Character#isJavaIdentifierStart(int)
jaroslav@68	1830	* @see Character#isLetterOrDigit(int)
jaroslav@68	1831	* @see Character#isLowerCase(int)
jaroslav@68	1832	* @see Character#isTitleCase(int)
jaroslav@68	1833	* @see Character#isUnicodeIdentifierStart(int)
jaroslav@68	1834	* @see Character#isUpperCase(int)
jaroslav@68	1835	* @since 1.5
jaroslav@68	1836	*/
jaroslav@68	1837	public static boolean isLetter(int codePoint) {
jaroslav@68	1838	return ((((1 << Character.UPPERCASE_LETTER) \|
jaroslav@68	1839	(1 << Character.LOWERCASE_LETTER) \|
jaroslav@68	1840	(1 << Character.TITLECASE_LETTER) \|
jaroslav@68	1841	(1 << Character.MODIFIER_LETTER) \|
jaroslav@68	1842	(1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
jaroslav@68	1843	!= 0;
jaroslav@68	1844	}
jaroslav@68	1845
jaroslav@68	1846	/**
jaroslav@68	1847	* Determines if the specified character is a letter or digit.
jaroslav@68	1848	* <p>
jaroslav@68	1849	* A character is considered to be a letter or digit if either
jaroslav@68	1850	* {@code Character.isLetter(char ch)} or
jaroslav@68	1851	* {@code Character.isDigit(char ch)} returns
jaroslav@68	1852	* {@code true} for the character.
jaroslav@68	1853	*
jaroslav@68	1854	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1855	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1856	* all Unicode characters, including supplementary characters, use
jaroslav@68	1857	* the {@link #isLetterOrDigit(int)} method.
jaroslav@68	1858	*
jaroslav@68	1859	* @param ch the character to be tested.
jaroslav@68	1860	* @return {@code true} if the character is a letter or digit;
jaroslav@68	1861	* {@code false} otherwise.
jaroslav@68	1862	* @see Character#isDigit(char)
jaroslav@68	1863	* @see Character#isJavaIdentifierPart(char)
jaroslav@68	1864	* @see Character#isJavaLetter(char)
jaroslav@68	1865	* @see Character#isJavaLetterOrDigit(char)
jaroslav@68	1866	* @see Character#isLetter(char)
jaroslav@68	1867	* @see Character#isUnicodeIdentifierPart(char)
jaroslav@68	1868	* @since 1.0.2
jaroslav@68	1869	*/
jaroslav@68	1870	public static boolean isLetterOrDigit(char ch) {
jaroslav@68	1871	return isLetterOrDigit((int)ch);
jaroslav@68	1872	}
jaroslav@68	1873
jaroslav@68	1874	/**
jaroslav@68	1875	* Determines if the specified character (Unicode code point) is a letter or digit.
jaroslav@68	1876	* <p>
jaroslav@68	1877	* A character is considered to be a letter or digit if either
jaroslav@68	1878	* {@link #isLetter(int) isLetter(codePoint)} or
jaroslav@68	1879	* {@link #isDigit(int) isDigit(codePoint)} returns
jaroslav@68	1880	* {@code true} for the character.
jaroslav@68	1881	*
jaroslav@68	1882	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1883	* @return {@code true} if the character is a letter or digit;
jaroslav@68	1884	* {@code false} otherwise.
jaroslav@68	1885	* @see Character#isDigit(int)
jaroslav@68	1886	* @see Character#isJavaIdentifierPart(int)
jaroslav@68	1887	* @see Character#isLetter(int)
jaroslav@68	1888	* @see Character#isUnicodeIdentifierPart(int)
jaroslav@68	1889	* @since 1.5
jaroslav@68	1890	*/
jaroslav@68	1891	public static boolean isLetterOrDigit(int codePoint) {
jaroslav@68	1892	return ((((1 << Character.UPPERCASE_LETTER) \|
jaroslav@68	1893	(1 << Character.LOWERCASE_LETTER) \|
jaroslav@68	1894	(1 << Character.TITLECASE_LETTER) \|
jaroslav@68	1895	(1 << Character.MODIFIER_LETTER) \|
jaroslav@68	1896	(1 << Character.OTHER_LETTER) \|
jaroslav@68	1897	(1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
jaroslav@68	1898	!= 0;
jaroslav@68	1899	}
jaroslav@85	1900
jaroslav@85	1901	static int getType(int x) {
jaroslav@85	1902	throw new UnsupportedOperationException();
jaroslav@68	1903	}
jaroslav@85	1904
jaroslav@68	1905	/**
jaroslav@68	1906	* Converts the character argument to lowercase using case
jaroslav@68	1907	* mapping information from the UnicodeData file.
jaroslav@68	1908	* <p>
jaroslav@68	1909	* Note that
jaroslav@68	1910	* {@code Character.isLowerCase(Character.toLowerCase(ch))}
jaroslav@68	1911	* does not always return {@code true} for some ranges of
jaroslav@68	1912	* characters, particularly those that are symbols or ideographs.
jaroslav@68	1913	*
jaroslav@68	1914	* <p>In general, {@link String#toLowerCase()} should be used to map
jaroslav@68	1915	* characters to lowercase. {@code String} case mapping methods
jaroslav@68	1916	* have several benefits over {@code Character} case mapping methods.
jaroslav@68	1917	* {@code String} case mapping methods can perform locale-sensitive
jaroslav@68	1918	* mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68	1919	* the {@code Character} case mapping methods cannot.
jaroslav@68	1920	*
jaroslav@68	1921	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1922	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1923	* all Unicode characters, including supplementary characters, use
jaroslav@68	1924	* the {@link #toLowerCase(int)} method.
jaroslav@68	1925	*
jaroslav@68	1926	* @param ch the character to be converted.
jaroslav@68	1927	* @return the lowercase equivalent of the character, if any;
jaroslav@68	1928	* otherwise, the character itself.
jaroslav@68	1929	* @see Character#isLowerCase(char)
jaroslav@68	1930	* @see String#toLowerCase()
jaroslav@68	1931	*/
jaroslav@68	1932	public static char toLowerCase(char ch) {
jaroslav@85	1933	throw new UnsupportedOperationException();
jaroslav@68	1934	}
jaroslav@68	1935
jaroslav@68	1936	/**
jaroslav@68	1937	* Converts the character argument to uppercase using case mapping
jaroslav@68	1938	* information from the UnicodeData file.
jaroslav@68	1939	* <p>
jaroslav@68	1940	* Note that
jaroslav@68	1941	* {@code Character.isUpperCase(Character.toUpperCase(ch))}
jaroslav@68	1942	* does not always return {@code true} for some ranges of
jaroslav@68	1943	* characters, particularly those that are symbols or ideographs.
jaroslav@68	1944	*
jaroslav@68	1945	* <p>In general, {@link String#toUpperCase()} should be used to map
jaroslav@68	1946	* characters to uppercase. {@code String} case mapping methods
jaroslav@68	1947	* have several benefits over {@code Character} case mapping methods.
jaroslav@68	1948	* {@code String} case mapping methods can perform locale-sensitive
jaroslav@68	1949	* mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68	1950	* the {@code Character} case mapping methods cannot.
jaroslav@68	1951	*
jaroslav@68	1952	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1953	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1954	* all Unicode characters, including supplementary characters, use
jaroslav@68	1955	* the {@link #toUpperCase(int)} method.
jaroslav@68	1956	*
jaroslav@68	1957	* @param ch the character to be converted.
jaroslav@68	1958	* @return the uppercase equivalent of the character, if any;
jaroslav@68	1959	* otherwise, the character itself.
jaroslav@68	1960	* @see Character#isUpperCase(char)
jaroslav@68	1961	* @see String#toUpperCase()
jaroslav@68	1962	*/
jaroslav@68	1963	public static char toUpperCase(char ch) {
jaroslav@85	1964	throw new UnsupportedOperationException();
jaroslav@68	1965	}
jaroslav@68	1966
jaroslav@68	1967	/**
jaroslav@68	1968	* Returns the numeric value of the character {@code ch} in the
jaroslav@68	1969	* specified radix.
jaroslav@68	1970	* <p>
jaroslav@68	1971	* If the radix is not in the range {@code MIN_RADIX} ≤
jaroslav@68	1972	* {@code radix} ≤ {@code MAX_RADIX} or if the
jaroslav@68	1973	* value of {@code ch} is not a valid digit in the specified
jaroslav@68	1974	* radix, {@code -1} is returned. A character is a valid digit
jaroslav@68	1975	* if at least one of the following is true:
jaroslav@68	1976	* <ul>
jaroslav@68	1977	* <li>The method {@code isDigit} is {@code true} of the character
jaroslav@68	1978	* and the Unicode decimal digit value of the character (or its
jaroslav@68	1979	* single-character decomposition) is less than the specified radix.
jaroslav@68	1980	* In this case the decimal digit value is returned.
jaroslav@68	1981	* <li>The character is one of the uppercase Latin letters
jaroslav@68	1982	* {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68	1983	* {@code radix + 'A' - 10}.
jaroslav@68	1984	* In this case, {@code ch - 'A' + 10}
jaroslav@68	1985	* is returned.
jaroslav@68	1986	* <li>The character is one of the lowercase Latin letters
jaroslav@68	1987	* {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68	1988	* {@code radix + 'a' - 10}.
jaroslav@68	1989	* In this case, {@code ch - 'a' + 10}
jaroslav@68	1990	* is returned.
jaroslav@68	1991	* <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68	1992	* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68	1993	* and its code is less than
jaroslav@68	1994	* {@code radix + '\u005CuFF21' - 10}.
jaroslav@68	1995	* In this case, {@code ch - '\u005CuFF21' + 10}
jaroslav@68	1996	* is returned.
jaroslav@68	1997	* <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68	1998	* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68	1999	* and its code is less than
jaroslav@68	2000	* {@code radix + '\u005CuFF41' - 10}.
jaroslav@68	2001	* In this case, {@code ch - '\u005CuFF41' + 10}
jaroslav@68	2002	* is returned.
jaroslav@68	2003	* </ul>
jaroslav@68	2004	*
jaroslav@68	2005	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2006	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2007	* all Unicode characters, including supplementary characters, use
jaroslav@68	2008	* the {@link #digit(int, int)} method.
jaroslav@68	2009	*
jaroslav@68	2010	* @param ch the character to be converted.
jaroslav@68	2011	* @param radix the radix.
jaroslav@68	2012	* @return the numeric value represented by the character in the
jaroslav@68	2013	* specified radix.
jaroslav@68	2014	* @see Character#forDigit(int, int)
jaroslav@68	2015	* @see Character#isDigit(char)
jaroslav@68	2016	*/
jaroslav@68	2017	public static int digit(char ch, int radix) {
jaroslav@68	2018	return digit((int)ch, radix);
jaroslav@68	2019	}
jaroslav@68	2020
jaroslav@68	2021	/**
jaroslav@68	2022	* Returns the numeric value of the specified character (Unicode
jaroslav@68	2023	* code point) in the specified radix.
jaroslav@68	2024	*
jaroslav@68	2025	* <p>If the radix is not in the range {@code MIN_RADIX} ≤
jaroslav@68	2026	* {@code radix} ≤ {@code MAX_RADIX} or if the
jaroslav@68	2027	* character is not a valid digit in the specified
jaroslav@68	2028	* radix, {@code -1} is returned. A character is a valid digit
jaroslav@68	2029	* if at least one of the following is true:
jaroslav@68	2030	* <ul>
jaroslav@68	2031	* <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
jaroslav@68	2032	* and the Unicode decimal digit value of the character (or its
jaroslav@68	2033	* single-character decomposition) is less than the specified radix.
jaroslav@68	2034	* In this case the decimal digit value is returned.
jaroslav@68	2035	* <li>The character is one of the uppercase Latin letters
jaroslav@68	2036	* {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68	2037	* {@code radix + 'A' - 10}.
jaroslav@68	2038	* In this case, {@code codePoint - 'A' + 10}
jaroslav@68	2039	* is returned.
jaroslav@68	2040	* <li>The character is one of the lowercase Latin letters
jaroslav@68	2041	* {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68	2042	* {@code radix + 'a' - 10}.
jaroslav@68	2043	* In this case, {@code codePoint - 'a' + 10}
jaroslav@68	2044	* is returned.
jaroslav@68	2045	* <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68	2046	* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68	2047	* and its code is less than
jaroslav@68	2048	* {@code radix + '\u005CuFF21' - 10}.
jaroslav@68	2049	* In this case,
jaroslav@68	2050	* {@code codePoint - '\u005CuFF21' + 10}
jaroslav@68	2051	* is returned.
jaroslav@68	2052	* <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68	2053	* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68	2054	* and its code is less than
jaroslav@68	2055	* {@code radix + '\u005CuFF41'- 10}.
jaroslav@68	2056	* In this case,
jaroslav@68	2057	* {@code codePoint - '\u005CuFF41' + 10}
jaroslav@68	2058	* is returned.
jaroslav@68	2059	* </ul>
jaroslav@68	2060	*
jaroslav@68	2061	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	2062	* @param radix the radix.
jaroslav@68	2063	* @return the numeric value represented by the character in the
jaroslav@68	2064	* specified radix.
jaroslav@68	2065	* @see Character#forDigit(int, int)
jaroslav@68	2066	* @see Character#isDigit(int)
jaroslav@68	2067	* @since 1.5
jaroslav@68	2068	*/
jaroslav@68	2069	public static int digit(int codePoint, int radix) {
jaroslav@85	2070	throw new UnsupportedOperationException();
jaroslav@68	2071	}
jaroslav@68	2072
jaroslav@68	2073	/**
jaroslav@68	2074	* Returns the {@code int} value that the specified Unicode
jaroslav@68	2075	* character represents. For example, the character
jaroslav@68	2076	* {@code '\u005Cu216C'} (the roman numeral fifty) will return
jaroslav@68	2077	* an int with a value of 50.
jaroslav@68	2078	* <p>
jaroslav@68	2079	* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68	2080	* {@code '\u005Cu005A'}), lowercase
jaroslav@68	2081	* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68	2082	* full width variant ({@code '\u005CuFF21'} through
jaroslav@68	2083	* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68	2084	* {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68	2085	* through 35. This is independent of the Unicode specification,
jaroslav@68	2086	* which does not assign numeric values to these {@code char}
jaroslav@68	2087	* values.
jaroslav@68	2088	* <p>
jaroslav@68	2089	* If the character does not have a numeric value, then -1 is returned.
jaroslav@68	2090	* If the character has a numeric value that cannot be represented as a
jaroslav@68	2091	* nonnegative integer (for example, a fractional value), then -2
jaroslav@68	2092	* is returned.
jaroslav@68	2093	*
jaroslav@68	2094	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2095	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2096	* all Unicode characters, including supplementary characters, use
jaroslav@68	2097	* the {@link #getNumericValue(int)} method.
jaroslav@68	2098	*
jaroslav@68	2099	* @param ch the character to be converted.
jaroslav@68	2100	* @return the numeric value of the character, as a nonnegative {@code int}
jaroslav@68	2101	* value; -2 if the character has a numeric value that is not a
jaroslav@68	2102	* nonnegative integer; -1 if the character has no numeric value.
jaroslav@68	2103	* @see Character#forDigit(int, int)
jaroslav@68	2104	* @see Character#isDigit(char)
jaroslav@68	2105	* @since 1.1
jaroslav@68	2106	*/
jaroslav@68	2107	public static int getNumericValue(char ch) {
jaroslav@68	2108	return getNumericValue((int)ch);
jaroslav@68	2109	}
jaroslav@68	2110
jaroslav@68	2111	/**
jaroslav@68	2112	* Returns the {@code int} value that the specified
jaroslav@68	2113	* character (Unicode code point) represents. For example, the character
jaroslav@68	2114	* {@code '\u005Cu216C'} (the Roman numeral fifty) will return
jaroslav@68	2115	* an {@code int} with a value of 50.
jaroslav@68	2116	* <p>
jaroslav@68	2117	* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68	2118	* {@code '\u005Cu005A'}), lowercase
jaroslav@68	2119	* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68	2120	* full width variant ({@code '\u005CuFF21'} through
jaroslav@68	2121	* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68	2122	* {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68	2123	* through 35. This is independent of the Unicode specification,
jaroslav@68	2124	* which does not assign numeric values to these {@code char}
jaroslav@68	2125	* values.
jaroslav@68	2126	* <p>
jaroslav@68	2127	* If the character does not have a numeric value, then -1 is returned.
jaroslav@68	2128	* If the character has a numeric value that cannot be represented as a
jaroslav@68	2129	* nonnegative integer (for example, a fractional value), then -2
jaroslav@68	2130	* is returned.
jaroslav@68	2131	*
jaroslav@68	2132	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	2133	* @return the numeric value of the character, as a nonnegative {@code int}
jaroslav@68	2134	* value; -2 if the character has a numeric value that is not a
jaroslav@68	2135	* nonnegative integer; -1 if the character has no numeric value.
jaroslav@68	2136	* @see Character#forDigit(int, int)
jaroslav@68	2137	* @see Character#isDigit(int)
jaroslav@68	2138	* @since 1.5
jaroslav@68	2139	*/
jaroslav@68	2140	public static int getNumericValue(int codePoint) {
jaroslav@85	2141	throw new UnsupportedOperationException();
jaroslav@68	2142	}
jaroslav@68	2143
jaroslav@68	2144	/**
jaroslav@68	2145	* Determines if the specified character is ISO-LATIN-1 white space.
jaroslav@68	2146	* This method returns {@code true} for the following five
jaroslav@68	2147	* characters only:
jaroslav@68	2148	* <table>
jaroslav@68	2149	* <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
jaroslav@68	2150	* <td>{@code HORIZONTAL TABULATION}</td></tr>
jaroslav@68	2151	* <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
jaroslav@68	2152	* <td>{@code NEW LINE}</td></tr>
jaroslav@68	2153	* <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
jaroslav@68	2154	* <td>{@code FORM FEED}</td></tr>
jaroslav@68	2155	* <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
jaroslav@68	2156	* <td>{@code CARRIAGE RETURN}</td></tr>
jaroslav@68	2157	* <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
jaroslav@68	2158	* <td>{@code SPACE}</td></tr>
jaroslav@68	2159	* </table>
jaroslav@68	2160	*
jaroslav@68	2161	* @param ch the character to be tested.
jaroslav@68	2162	* @return {@code true} if the character is ISO-LATIN-1 white
jaroslav@68	2163	* space; {@code false} otherwise.
jaroslav@68	2164	* @see Character#isSpaceChar(char)
jaroslav@68	2165	* @see Character#isWhitespace(char)
jaroslav@68	2166	* @deprecated Replaced by isWhitespace(char).
jaroslav@68	2167	*/
jaroslav@68	2168	@Deprecated
jaroslav@68	2169	public static boolean isSpace(char ch) {
jaroslav@68	2170	return (ch <= 0x0020) &&
jaroslav@68	2171	(((((1L << 0x0009) \|
jaroslav@68	2172	(1L << 0x000A) \|
jaroslav@68	2173	(1L << 0x000C) \|
jaroslav@68	2174	(1L << 0x000D) \|
jaroslav@68	2175	(1L << 0x0020)) >> ch) & 1L) != 0);
jaroslav@68	2176	}
jaroslav@68	2177
jaroslav@68	2178
jaroslav@68	2179
jaroslav@68	2180	/**
jaroslav@68	2181	* Determines if the specified character is white space according to Java.
jaroslav@68	2182	* A character is a Java whitespace character if and only if it satisfies
jaroslav@68	2183	* one of the following criteria:
jaroslav@68	2184	* <ul>
jaroslav@68	2185	* <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
jaroslav@68	2186	* {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
jaroslav@68	2187	* but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68	2188	* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68	2189	* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68	2190	* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68	2191	* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68	2192	* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68	2193	* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68	2194	* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68	2195	* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68	2196	* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68	2197	* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68	2198	* </ul>
jaroslav@68	2199	*
jaroslav@68	2200	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2201	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2202	* all Unicode characters, including supplementary characters, use
jaroslav@68	2203	* the {@link #isWhitespace(int)} method.
jaroslav@68	2204	*
jaroslav@68	2205	* @param ch the character to be tested.
jaroslav@68	2206	* @return {@code true} if the character is a Java whitespace
jaroslav@68	2207	* character; {@code false} otherwise.
jaroslav@68	2208	* @see Character#isSpaceChar(char)
jaroslav@68	2209	* @since 1.1
jaroslav@68	2210	*/
jaroslav@68	2211	public static boolean isWhitespace(char ch) {
jaroslav@68	2212	return isWhitespace((int)ch);
jaroslav@68	2213	}
jaroslav@68	2214
jaroslav@68	2215	/**
jaroslav@68	2216	* Determines if the specified character (Unicode code point) is
jaroslav@68	2217	* white space according to Java. A character is a Java
jaroslav@68	2218	* whitespace character if and only if it satisfies one of the
jaroslav@68	2219	* following criteria:
jaroslav@68	2220	* <ul>
jaroslav@68	2221	* <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
jaroslav@68	2222	* {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
jaroslav@68	2223	* but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68	2224	* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68	2225	* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68	2226	* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68	2227	* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68	2228	* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68	2229	* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68	2230	* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68	2231	* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68	2232	* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68	2233	* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68	2234	* </ul>
jaroslav@68	2235	* <p>
jaroslav@68	2236	*
jaroslav@68	2237	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	2238	* @return {@code true} if the character is a Java whitespace
jaroslav@68	2239	* character; {@code false} otherwise.
jaroslav@68	2240	* @see Character#isSpaceChar(int)
jaroslav@68	2241	* @since 1.5
jaroslav@68	2242	*/
jaroslav@68	2243	public static boolean isWhitespace(int codePoint) {
jaroslav@85	2244	throw new UnsupportedOperationException();
jaroslav@68	2245	}
jaroslav@68	2246
jaroslav@68	2247	/**
jaroslav@68	2248	* Determines if the specified character is an ISO control
jaroslav@68	2249	* character. A character is considered to be an ISO control
jaroslav@68	2250	* character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68	2251	* through {@code '\u005Cu001F'} or in the range
jaroslav@68	2252	* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68	2253	*
jaroslav@68	2254	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2255	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2256	* all Unicode characters, including supplementary characters, use
jaroslav@68	2257	* the {@link #isISOControl(int)} method.
jaroslav@68	2258	*
jaroslav@68	2259	* @param ch the character to be tested.
jaroslav@68	2260	* @return {@code true} if the character is an ISO control character;
jaroslav@68	2261	* {@code false} otherwise.
jaroslav@68	2262	*
jaroslav@68	2263	* @see Character#isSpaceChar(char)
jaroslav@68	2264	* @see Character#isWhitespace(char)
jaroslav@68	2265	* @since 1.1
jaroslav@68	2266	*/
jaroslav@68	2267	public static boolean isISOControl(char ch) {
jaroslav@68	2268	return isISOControl((int)ch);
jaroslav@68	2269	}
jaroslav@68	2270
jaroslav@68	2271	/**
jaroslav@68	2272	* Determines if the referenced character (Unicode code point) is an ISO control
jaroslav@68	2273	* character. A character is considered to be an ISO control
jaroslav@68	2274	* character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68	2275	* through {@code '\u005Cu001F'} or in the range
jaroslav@68	2276	* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68	2277	*
jaroslav@68	2278	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	2279	* @return {@code true} if the character is an ISO control character;
jaroslav@68	2280	* {@code false} otherwise.
jaroslav@68	2281	* @see Character#isSpaceChar(int)
jaroslav@68	2282	* @see Character#isWhitespace(int)
jaroslav@68	2283	* @since 1.5
jaroslav@68	2284	*/
jaroslav@68	2285	public static boolean isISOControl(int codePoint) {
jaroslav@68	2286	// Optimized form of:
jaroslav@68	2287	// (codePoint >= 0x00 && codePoint <= 0x1F) \|\|
jaroslav@68	2288	// (codePoint >= 0x7F && codePoint <= 0x9F);
jaroslav@68	2289	return codePoint <= 0x9F &&
jaroslav@68	2290	(codePoint >= 0x7F \|\| (codePoint >>> 5 == 0));
jaroslav@68	2291	}
jaroslav@68	2292
jaroslav@68	2293	/**
jaroslav@68	2294	* Determines the character representation for a specific digit in
jaroslav@68	2295	* the specified radix. If the value of {@code radix} is not a
jaroslav@68	2296	* valid radix, or the value of {@code digit} is not a valid
jaroslav@68	2297	* digit in the specified radix, the null character
jaroslav@68	2298	* ({@code '\u005Cu0000'}) is returned.
jaroslav@68	2299	* <p>
jaroslav@68	2300	* The {@code radix} argument is valid if it is greater than or
jaroslav@68	2301	* equal to {@code MIN_RADIX} and less than or equal to
jaroslav@68	2302	* {@code MAX_RADIX}. The {@code digit} argument is valid if
jaroslav@68	2303	* {@code 0 <= digit < radix}.
jaroslav@68	2304	* <p>
jaroslav@68	2305	* If the digit is less than 10, then
jaroslav@68	2306	* {@code '0' + digit} is returned. Otherwise, the value
jaroslav@68	2307	* {@code 'a' + digit - 10} is returned.
jaroslav@68	2308	*
jaroslav@68	2309	* @param digit the number to convert to a character.
jaroslav@68	2310	* @param radix the radix.
jaroslav@68	2311	* @return the {@code char} representation of the specified digit
jaroslav@68	2312	* in the specified radix.
jaroslav@68	2313	* @see Character#MIN_RADIX
jaroslav@68	2314	* @see Character#MAX_RADIX
jaroslav@68	2315	* @see Character#digit(char, int)
jaroslav@68	2316	*/
jaroslav@68	2317	public static char forDigit(int digit, int radix) {
jaroslav@68	2318	if ((digit >= radix) \|\| (digit < 0)) {
jaroslav@68	2319	return '\0';
jaroslav@68	2320	}
jaroslav@68	2321	if ((radix < Character.MIN_RADIX) \|\| (radix > Character.MAX_RADIX)) {
jaroslav@68	2322	return '\0';
jaroslav@68	2323	}
jaroslav@68	2324	if (digit < 10) {
jaroslav@68	2325	return (char)('0' + digit);
jaroslav@68	2326	}
jaroslav@68	2327	return (char)('a' - 10 + digit);
jaroslav@68	2328	}
jaroslav@68	2329
jaroslav@68	2330	/**
jaroslav@68	2331	* Compares two {@code Character} objects numerically.
jaroslav@68	2332	*
jaroslav@68	2333	* @param anotherCharacter the {@code Character} to be compared.
jaroslav@68	2334
jaroslav@68	2335	* @return the value {@code 0} if the argument {@code Character}
jaroslav@68	2336	* is equal to this {@code Character}; a value less than
jaroslav@68	2337	* {@code 0} if this {@code Character} is numerically less
jaroslav@68	2338	* than the {@code Character} argument; and a value greater than
jaroslav@68	2339	* {@code 0} if this {@code Character} is numerically greater
jaroslav@68	2340	* than the {@code Character} argument (unsigned comparison).
jaroslav@68	2341	* Note that this is strictly a numerical comparison; it is not
jaroslav@68	2342	* locale-dependent.
jaroslav@68	2343	* @since 1.2
jaroslav@68	2344	*/
jaroslav@68	2345	public int compareTo(Character anotherCharacter) {
jaroslav@68	2346	return compare(this.value, anotherCharacter.value);
jaroslav@68	2347	}
jaroslav@68	2348
jaroslav@68	2349	/**
jaroslav@68	2350	* Compares two {@code char} values numerically.
jaroslav@68	2351	* The value returned is identical to what would be returned by:
jaroslav@68	2352	* <pre>
jaroslav@68	2353	* Character.valueOf(x).compareTo(Character.valueOf(y))
jaroslav@68	2354	* </pre>
jaroslav@68	2355	*
jaroslav@68	2356	* @param x the first {@code char} to compare
jaroslav@68	2357	* @param y the second {@code char} to compare
jaroslav@68	2358	* @return the value {@code 0} if {@code x == y};
jaroslav@68	2359	* a value less than {@code 0} if {@code x < y}; and
jaroslav@68	2360	* a value greater than {@code 0} if {@code x > y}
jaroslav@68	2361	* @since 1.7
jaroslav@68	2362	*/
jaroslav@68	2363	public static int compare(char x, char y) {
jaroslav@68	2364	return x - y;
jaroslav@68	2365	}
jaroslav@68	2366
jaroslav@68	2367
jaroslav@68	2368	/**
jaroslav@68	2369	* The number of bits used to represent a <tt>char</tt> value in unsigned
jaroslav@68	2370	* binary form, constant {@code 16}.
jaroslav@68	2371	*
jaroslav@68	2372	* @since 1.5
jaroslav@68	2373	*/
jaroslav@68	2374	public static final int SIZE = 16;
jaroslav@68	2375
jaroslav@68	2376	/**
jaroslav@68	2377	* Returns the value obtained by reversing the order of the bytes in the
jaroslav@68	2378	* specified <tt>char</tt> value.
jaroslav@68	2379	*
jaroslav@68	2380	* @return the value obtained by reversing (or, equivalently, swapping)
jaroslav@68	2381	* the bytes in the specified <tt>char</tt> value.
jaroslav@68	2382	* @since 1.5
jaroslav@68	2383	*/
jaroslav@68	2384	public static char reverseBytes(char ch) {
jaroslav@68	2385	return (char) (((ch & 0xFF00) >> 8) \| (ch << 8));
jaroslav@68	2386	}
jaroslav@68	2387
jaroslav@68	2388	}

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Sun, 30 Sep 2012 18:40:47 -0700
branch	emul
changeset 85	9f3c454e74d4
parent 68	a2924470187b
child 326	23b4a344fe02
permissions	-rw-r--r--