hg/bck2brwsr: rt/emul/mini/src/main/java/java/lang/Character.java@d382dacfd73f (annotated)

jaroslav@68	1	/*
jaroslav@68	2	* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
jaroslav@68	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@68	4	*
jaroslav@68	5	* This code is free software; you can redistribute it and/or modify it
jaroslav@68	6	* under the terms of the GNU General Public License version 2 only, as
jaroslav@68	7	* published by the Free Software Foundation. Oracle designates this
jaroslav@68	8	* particular file as subject to the "Classpath" exception as provided
jaroslav@68	9	* by Oracle in the LICENSE file that accompanied this code.
jaroslav@68	10	*
jaroslav@68	11	* This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@68	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@68	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
jaroslav@68	14	* version 2 for more details (a copy is included in the LICENSE file that
jaroslav@68	15	* accompanied this code).
jaroslav@68	16	*
jaroslav@68	17	* You should have received a copy of the GNU General Public License version
jaroslav@68	18	* 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@68	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@68	20	*
jaroslav@68	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@68	22	* or visit www.oracle.com if you need additional information or have any
jaroslav@68	23	* questions.
jaroslav@68	24	*/
jaroslav@68	25
jaroslav@68	26	package java.lang;
jaroslav@68	27
jaroslav@326	28	import org.apidesign.bck2brwsr.core.JavaScriptBody;
jaroslav@326	29
jaroslav@68	30	/**
jaroslav@68	31	* The {@code Character} class wraps a value of the primitive
jaroslav@68	32	* type {@code char} in an object. An object of type
jaroslav@68	33	* {@code Character} contains a single field whose type is
jaroslav@68	34	* {@code char}.
jaroslav@68	35	* <p>
jaroslav@68	36	* In addition, this class provides several methods for determining
jaroslav@68	37	* a character's category (lowercase letter, digit, etc.) and for converting
jaroslav@68	38	* characters from uppercase to lowercase and vice versa.
jaroslav@68	39	* <p>
jaroslav@68	40	* Character information is based on the Unicode Standard, version 6.0.0.
jaroslav@68	41	* <p>
jaroslav@68	42	* The methods and data of class {@code Character} are defined by
jaroslav@68	43	* the information in the <i>UnicodeData</i> file that is part of the
jaroslav@68	44	* Unicode Character Database maintained by the Unicode
jaroslav@68	45	* Consortium. This file specifies various properties including name
jaroslav@68	46	* and general category for every defined Unicode code point or
jaroslav@68	47	* character range.
jaroslav@68	48	* <p>
jaroslav@68	49	* The file and its description are available from the Unicode Consortium at:
jaroslav@68	50	* <ul>
jaroslav@68	51	* <li><a href="http://www.unicode.org">http://www.unicode.org</a>
jaroslav@68	52	* </ul>
jaroslav@68	53	*
jaroslav@68	54	* <h4><a name="unicode">Unicode Character Representations</a></h4>
jaroslav@68	55	*
jaroslav@68	56	* <p>The {@code char} data type (and therefore the value that a
jaroslav@68	57	* {@code Character} object encapsulates) are based on the
jaroslav@68	58	* original Unicode specification, which defined characters as
jaroslav@68	59	* fixed-width 16-bit entities. The Unicode Standard has since been
jaroslav@68	60	* changed to allow for characters whose representation requires more
jaroslav@68	61	* than 16 bits. The range of legal <em>code point</em>s is now
jaroslav@68	62	* U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
jaroslav@68	63	* (Refer to the <a
jaroslav@68	64	* href="http://www.unicode.org/reports/tr27/#notation"><i>
jaroslav@68	65	* definition</i></a> of the U+<i>n</i> notation in the Unicode
jaroslav@68	66	* Standard.)
jaroslav@68	67	*
jaroslav@68	68	* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
jaroslav@68	69	* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
jaroslav@68	70	* <a name="supplementary">Characters</a> whose code points are greater
jaroslav@68	71	* than U+FFFF are called <em>supplementary character</em>s. The Java
jaroslav@68	72	* platform uses the UTF-16 representation in {@code char} arrays and
jaroslav@68	73	* in the {@code String} and {@code StringBuffer} classes. In
jaroslav@68	74	* this representation, supplementary characters are represented as a pair
jaroslav@68	75	* of {@code char} values, the first from the <em>high-surrogates</em>
jaroslav@68	76	* range, (\uD800-\uDBFF), the second from the
jaroslav@68	77	* <em>low-surrogates</em> range (\uDC00-\uDFFF).
jaroslav@68	78	*
jaroslav@68	79	* <p>A {@code char} value, therefore, represents Basic
jaroslav@68	80	* Multilingual Plane (BMP) code points, including the surrogate
jaroslav@68	81	* code points, or code units of the UTF-16 encoding. An
jaroslav@68	82	* {@code int} value represents all Unicode code points,
jaroslav@68	83	* including supplementary code points. The lower (least significant)
jaroslav@68	84	* 21 bits of {@code int} are used to represent Unicode code
jaroslav@68	85	* points and the upper (most significant) 11 bits must be zero.
jaroslav@68	86	* Unless otherwise specified, the behavior with respect to
jaroslav@68	87	* supplementary characters and surrogate {@code char} values is
jaroslav@68	88	* as follows:
jaroslav@68	89	*
jaroslav@68	90	* <ul>
jaroslav@68	91	* <li>The methods that only accept a {@code char} value cannot support
jaroslav@68	92	* supplementary characters. They treat {@code char} values from the
jaroslav@68	93	* surrogate ranges as undefined characters. For example,
jaroslav@68	94	* {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
jaroslav@68	95	* this specific value if followed by any low-surrogate value in a string
jaroslav@68	96	* would represent a letter.
jaroslav@68	97	*
jaroslav@68	98	* <li>The methods that accept an {@code int} value support all
jaroslav@68	99	* Unicode characters, including supplementary characters. For
jaroslav@68	100	* example, {@code Character.isLetter(0x2F81A)} returns
jaroslav@68	101	* {@code true} because the code point value represents a letter
jaroslav@68	102	* (a CJK ideograph).
jaroslav@68	103	* </ul>
jaroslav@68	104	*
jaroslav@68	105	* <p>In the Java SE API documentation, <em>Unicode code point</em> is
jaroslav@68	106	* used for character values in the range between U+0000 and U+10FFFF,
jaroslav@68	107	* and <em>Unicode code unit</em> is used for 16-bit
jaroslav@68	108	* {@code char} values that are code units of the <em>UTF-16</em>
jaroslav@68	109	* encoding. For more information on Unicode terminology, refer to the
jaroslav@68	110	* <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
jaroslav@68	111	*
jaroslav@68	112	* @author Lee Boynton
jaroslav@68	113	* @author Guy Steele
jaroslav@68	114	* @author Akira Tanaka
jaroslav@68	115	* @author Martin Buchholz
jaroslav@68	116	* @author Ulf Zibis
jaroslav@68	117	* @since 1.0
jaroslav@68	118	*/
jaroslav@68	119	public final
jaroslav@68	120	class Character implements java.io.Serializable, Comparable<Character> {
jaroslav@68	121	/**
jaroslav@68	122	* The minimum radix available for conversion to and from strings.
jaroslav@68	123	* The constant value of this field is the smallest value permitted
jaroslav@68	124	* for the radix argument in radix-conversion methods such as the
jaroslav@68	125	* {@code digit} method, the {@code forDigit} method, and the
jaroslav@68	126	* {@code toString} method of class {@code Integer}.
jaroslav@68	127	*
jaroslav@68	128	* @see Character#digit(char, int)
jaroslav@68	129	* @see Character#forDigit(int, int)
jaroslav@68	130	* @see Integer#toString(int, int)
jaroslav@68	131	* @see Integer#valueOf(String)
jaroslav@68	132	*/
jaroslav@68	133	public static final int MIN_RADIX = 2;
jaroslav@68	134
jaroslav@68	135	/**
jaroslav@68	136	* The maximum radix available for conversion to and from strings.
jaroslav@68	137	* The constant value of this field is the largest value permitted
jaroslav@68	138	* for the radix argument in radix-conversion methods such as the
jaroslav@68	139	* {@code digit} method, the {@code forDigit} method, and the
jaroslav@68	140	* {@code toString} method of class {@code Integer}.
jaroslav@68	141	*
jaroslav@68	142	* @see Character#digit(char, int)
jaroslav@68	143	* @see Character#forDigit(int, int)
jaroslav@68	144	* @see Integer#toString(int, int)
jaroslav@68	145	* @see Integer#valueOf(String)
jaroslav@68	146	*/
jaroslav@68	147	public static final int MAX_RADIX = 36;
jaroslav@68	148
jaroslav@68	149	/**
jaroslav@68	150	* The constant value of this field is the smallest value of type
jaroslav@68	151	* {@code char}, {@code '\u005Cu0000'}.
jaroslav@68	152	*
jaroslav@68	153	* @since 1.0.2
jaroslav@68	154	*/
jaroslav@68	155	public static final char MIN_VALUE = '\u0000';
jaroslav@68	156
jaroslav@68	157	/**
jaroslav@68	158	* The constant value of this field is the largest value of type
jaroslav@68	159	* {@code char}, {@code '\u005CuFFFF'}.
jaroslav@68	160	*
jaroslav@68	161	* @since 1.0.2
jaroslav@68	162	*/
jaroslav@68	163	public static final char MAX_VALUE = '\uFFFF';
jaroslav@68	164
jaroslav@68	165	/**
jaroslav@68	166	* The {@code Class} instance representing the primitive type
jaroslav@68	167	* {@code char}.
jaroslav@68	168	*
jaroslav@68	169	* @since 1.1
jaroslav@68	170	*/
jaroslav@68	171	public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
jaroslav@68	172
jaroslav@68	173	/*
jaroslav@68	174	* Normative general types
jaroslav@68	175	*/
jaroslav@68	176
jaroslav@68	177	/*
jaroslav@68	178	* General character types
jaroslav@68	179	*/
jaroslav@68	180
jaroslav@68	181	/**
jaroslav@68	182	* General category "Cn" in the Unicode specification.
jaroslav@68	183	* @since 1.1
jaroslav@68	184	*/
jaroslav@68	185	public static final byte UNASSIGNED = 0;
jaroslav@68	186
jaroslav@68	187	/**
jaroslav@68	188	* General category "Lu" in the Unicode specification.
jaroslav@68	189	* @since 1.1
jaroslav@68	190	*/
jaroslav@68	191	public static final byte UPPERCASE_LETTER = 1;
jaroslav@68	192
jaroslav@68	193	/**
jaroslav@68	194	* General category "Ll" in the Unicode specification.
jaroslav@68	195	* @since 1.1
jaroslav@68	196	*/
jaroslav@68	197	public static final byte LOWERCASE_LETTER = 2;
jaroslav@68	198
jaroslav@68	199	/**
jaroslav@68	200	* General category "Lt" in the Unicode specification.
jaroslav@68	201	* @since 1.1
jaroslav@68	202	*/
jaroslav@68	203	public static final byte TITLECASE_LETTER = 3;
jaroslav@68	204
jaroslav@68	205	/**
jaroslav@68	206	* General category "Lm" in the Unicode specification.
jaroslav@68	207	* @since 1.1
jaroslav@68	208	*/
jaroslav@68	209	public static final byte MODIFIER_LETTER = 4;
jaroslav@68	210
jaroslav@68	211	/**
jaroslav@68	212	* General category "Lo" in the Unicode specification.
jaroslav@68	213	* @since 1.1
jaroslav@68	214	*/
jaroslav@68	215	public static final byte OTHER_LETTER = 5;
jaroslav@68	216
jaroslav@68	217	/**
jaroslav@68	218	* General category "Mn" in the Unicode specification.
jaroslav@68	219	* @since 1.1
jaroslav@68	220	*/
jaroslav@68	221	public static final byte NON_SPACING_MARK = 6;
jaroslav@68	222
jaroslav@68	223	/**
jaroslav@68	224	* General category "Me" in the Unicode specification.
jaroslav@68	225	* @since 1.1
jaroslav@68	226	*/
jaroslav@68	227	public static final byte ENCLOSING_MARK = 7;
jaroslav@68	228
jaroslav@68	229	/**
jaroslav@68	230	* General category "Mc" in the Unicode specification.
jaroslav@68	231	* @since 1.1
jaroslav@68	232	*/
jaroslav@68	233	public static final byte COMBINING_SPACING_MARK = 8;
jaroslav@68	234
jaroslav@68	235	/**
jaroslav@68	236	* General category "Nd" in the Unicode specification.
jaroslav@68	237	* @since 1.1
jaroslav@68	238	*/
jaroslav@68	239	public static final byte DECIMAL_DIGIT_NUMBER = 9;
jaroslav@68	240
jaroslav@68	241	/**
jaroslav@68	242	* General category "Nl" in the Unicode specification.
jaroslav@68	243	* @since 1.1
jaroslav@68	244	*/
jaroslav@68	245	public static final byte LETTER_NUMBER = 10;
jaroslav@68	246
jaroslav@68	247	/**
jaroslav@68	248	* General category "No" in the Unicode specification.
jaroslav@68	249	* @since 1.1
jaroslav@68	250	*/
jaroslav@68	251	public static final byte OTHER_NUMBER = 11;
jaroslav@68	252
jaroslav@68	253	/**
jaroslav@68	254	* General category "Zs" in the Unicode specification.
jaroslav@68	255	* @since 1.1
jaroslav@68	256	*/
jaroslav@68	257	public static final byte SPACE_SEPARATOR = 12;
jaroslav@68	258
jaroslav@68	259	/**
jaroslav@68	260	* General category "Zl" in the Unicode specification.
jaroslav@68	261	* @since 1.1
jaroslav@68	262	*/
jaroslav@68	263	public static final byte LINE_SEPARATOR = 13;
jaroslav@68	264
jaroslav@68	265	/**
jaroslav@68	266	* General category "Zp" in the Unicode specification.
jaroslav@68	267	* @since 1.1
jaroslav@68	268	*/
jaroslav@68	269	public static final byte PARAGRAPH_SEPARATOR = 14;
jaroslav@68	270
jaroslav@68	271	/**
jaroslav@68	272	* General category "Cc" in the Unicode specification.
jaroslav@68	273	* @since 1.1
jaroslav@68	274	*/
jaroslav@68	275	public static final byte CONTROL = 15;
jaroslav@68	276
jaroslav@68	277	/**
jaroslav@68	278	* General category "Cf" in the Unicode specification.
jaroslav@68	279	* @since 1.1
jaroslav@68	280	*/
jaroslav@68	281	public static final byte FORMAT = 16;
jaroslav@68	282
jaroslav@68	283	/**
jaroslav@68	284	* General category "Co" in the Unicode specification.
jaroslav@68	285	* @since 1.1
jaroslav@68	286	*/
jaroslav@68	287	public static final byte PRIVATE_USE = 18;
jaroslav@68	288
jaroslav@68	289	/**
jaroslav@68	290	* General category "Cs" in the Unicode specification.
jaroslav@68	291	* @since 1.1
jaroslav@68	292	*/
jaroslav@68	293	public static final byte SURROGATE = 19;
jaroslav@68	294
jaroslav@68	295	/**
jaroslav@68	296	* General category "Pd" in the Unicode specification.
jaroslav@68	297	* @since 1.1
jaroslav@68	298	*/
jaroslav@68	299	public static final byte DASH_PUNCTUATION = 20;
jaroslav@68	300
jaroslav@68	301	/**
jaroslav@68	302	* General category "Ps" in the Unicode specification.
jaroslav@68	303	* @since 1.1
jaroslav@68	304	*/
jaroslav@68	305	public static final byte START_PUNCTUATION = 21;
jaroslav@68	306
jaroslav@68	307	/**
jaroslav@68	308	* General category "Pe" in the Unicode specification.
jaroslav@68	309	* @since 1.1
jaroslav@68	310	*/
jaroslav@68	311	public static final byte END_PUNCTUATION = 22;
jaroslav@68	312
jaroslav@68	313	/**
jaroslav@68	314	* General category "Pc" in the Unicode specification.
jaroslav@68	315	* @since 1.1
jaroslav@68	316	*/
jaroslav@68	317	public static final byte CONNECTOR_PUNCTUATION = 23;
jaroslav@68	318
jaroslav@68	319	/**
jaroslav@68	320	* General category "Po" in the Unicode specification.
jaroslav@68	321	* @since 1.1
jaroslav@68	322	*/
jaroslav@68	323	public static final byte OTHER_PUNCTUATION = 24;
jaroslav@68	324
jaroslav@68	325	/**
jaroslav@68	326	* General category "Sm" in the Unicode specification.
jaroslav@68	327	* @since 1.1
jaroslav@68	328	*/
jaroslav@68	329	public static final byte MATH_SYMBOL = 25;
jaroslav@68	330
jaroslav@68	331	/**
jaroslav@68	332	* General category "Sc" in the Unicode specification.
jaroslav@68	333	* @since 1.1
jaroslav@68	334	*/
jaroslav@68	335	public static final byte CURRENCY_SYMBOL = 26;
jaroslav@68	336
jaroslav@68	337	/**
jaroslav@68	338	* General category "Sk" in the Unicode specification.
jaroslav@68	339	* @since 1.1
jaroslav@68	340	*/
jaroslav@68	341	public static final byte MODIFIER_SYMBOL = 27;
jaroslav@68	342
jaroslav@68	343	/**
jaroslav@68	344	* General category "So" in the Unicode specification.
jaroslav@68	345	* @since 1.1
jaroslav@68	346	*/
jaroslav@68	347	public static final byte OTHER_SYMBOL = 28;
jaroslav@68	348
jaroslav@68	349	/**
jaroslav@68	350	* General category "Pi" in the Unicode specification.
jaroslav@68	351	* @since 1.4
jaroslav@68	352	*/
jaroslav@68	353	public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
jaroslav@68	354
jaroslav@68	355	/**
jaroslav@68	356	* General category "Pf" in the Unicode specification.
jaroslav@68	357	* @since 1.4
jaroslav@68	358	*/
jaroslav@68	359	public static final byte FINAL_QUOTE_PUNCTUATION = 30;
jaroslav@68	360
jaroslav@68	361	/**
jaroslav@68	362	* Error flag. Use int (code point) to avoid confusion with U+FFFF.
jaroslav@68	363	*/
jaroslav@68	364	static final int ERROR = 0xFFFFFFFF;
jaroslav@68	365
jaroslav@68	366
jaroslav@68	367	/**
jaroslav@68	368	* Undefined bidirectional character type. Undefined {@code char}
jaroslav@68	369	* values have undefined directionality in the Unicode specification.
jaroslav@68	370	* @since 1.4
jaroslav@68	371	*/
jaroslav@68	372	public static final byte DIRECTIONALITY_UNDEFINED = -1;
jaroslav@68	373
jaroslav@68	374	/**
jaroslav@68	375	* Strong bidirectional character type "L" in the Unicode specification.
jaroslav@68	376	* @since 1.4
jaroslav@68	377	*/
jaroslav@68	378	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
jaroslav@68	379
jaroslav@68	380	/**
jaroslav@68	381	* Strong bidirectional character type "R" in the Unicode specification.
jaroslav@68	382	* @since 1.4
jaroslav@68	383	*/
jaroslav@68	384	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
jaroslav@68	385
jaroslav@68	386	/**
jaroslav@68	387	* Strong bidirectional character type "AL" in the Unicode specification.
jaroslav@68	388	* @since 1.4
jaroslav@68	389	*/
jaroslav@68	390	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
jaroslav@68	391
jaroslav@68	392	/**
jaroslav@68	393	* Weak bidirectional character type "EN" in the Unicode specification.
jaroslav@68	394	* @since 1.4
jaroslav@68	395	*/
jaroslav@68	396	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
jaroslav@68	397
jaroslav@68	398	/**
jaroslav@68	399	* Weak bidirectional character type "ES" in the Unicode specification.
jaroslav@68	400	* @since 1.4
jaroslav@68	401	*/
jaroslav@68	402	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
jaroslav@68	403
jaroslav@68	404	/**
jaroslav@68	405	* Weak bidirectional character type "ET" in the Unicode specification.
jaroslav@68	406	* @since 1.4
jaroslav@68	407	*/
jaroslav@68	408	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
jaroslav@68	409
jaroslav@68	410	/**
jaroslav@68	411	* Weak bidirectional character type "AN" in the Unicode specification.
jaroslav@68	412	* @since 1.4
jaroslav@68	413	*/
jaroslav@68	414	public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
jaroslav@68	415
jaroslav@68	416	/**
jaroslav@68	417	* Weak bidirectional character type "CS" in the Unicode specification.
jaroslav@68	418	* @since 1.4
jaroslav@68	419	*/
jaroslav@68	420	public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
jaroslav@68	421
jaroslav@68	422	/**
jaroslav@68	423	* Weak bidirectional character type "NSM" in the Unicode specification.
jaroslav@68	424	* @since 1.4
jaroslav@68	425	*/
jaroslav@68	426	public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
jaroslav@68	427
jaroslav@68	428	/**
jaroslav@68	429	* Weak bidirectional character type "BN" in the Unicode specification.
jaroslav@68	430	* @since 1.4
jaroslav@68	431	*/
jaroslav@68	432	public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
jaroslav@68	433
jaroslav@68	434	/**
jaroslav@68	435	* Neutral bidirectional character type "B" in the Unicode specification.
jaroslav@68	436	* @since 1.4
jaroslav@68	437	*/
jaroslav@68	438	public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
jaroslav@68	439
jaroslav@68	440	/**
jaroslav@68	441	* Neutral bidirectional character type "S" in the Unicode specification.
jaroslav@68	442	* @since 1.4
jaroslav@68	443	*/
jaroslav@68	444	public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
jaroslav@68	445
jaroslav@68	446	/**
jaroslav@68	447	* Neutral bidirectional character type "WS" in the Unicode specification.
jaroslav@68	448	* @since 1.4
jaroslav@68	449	*/
jaroslav@68	450	public static final byte DIRECTIONALITY_WHITESPACE = 12;
jaroslav@68	451
jaroslav@68	452	/**
jaroslav@68	453	* Neutral bidirectional character type "ON" in the Unicode specification.
jaroslav@68	454	* @since 1.4
jaroslav@68	455	*/
jaroslav@68	456	public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
jaroslav@68	457
jaroslav@68	458	/**
jaroslav@68	459	* Strong bidirectional character type "LRE" in the Unicode specification.
jaroslav@68	460	* @since 1.4
jaroslav@68	461	*/
jaroslav@68	462	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
jaroslav@68	463
jaroslav@68	464	/**
jaroslav@68	465	* Strong bidirectional character type "LRO" in the Unicode specification.
jaroslav@68	466	* @since 1.4
jaroslav@68	467	*/
jaroslav@68	468	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
jaroslav@68	469
jaroslav@68	470	/**
jaroslav@68	471	* Strong bidirectional character type "RLE" in the Unicode specification.
jaroslav@68	472	* @since 1.4
jaroslav@68	473	*/
jaroslav@68	474	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
jaroslav@68	475
jaroslav@68	476	/**
jaroslav@68	477	* Strong bidirectional character type "RLO" in the Unicode specification.
jaroslav@68	478	* @since 1.4
jaroslav@68	479	*/
jaroslav@68	480	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
jaroslav@68	481
jaroslav@68	482	/**
jaroslav@68	483	* Weak bidirectional character type "PDF" in the Unicode specification.
jaroslav@68	484	* @since 1.4
jaroslav@68	485	*/
jaroslav@68	486	public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
jaroslav@68	487
jaroslav@68	488	/**
jaroslav@68	489	* The minimum value of a
jaroslav@68	490	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	491	* Unicode high-surrogate code unit</a>
jaroslav@68	492	* in the UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68	493	* A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68	494	*
jaroslav@68	495	* @since 1.5
jaroslav@68	496	*/
jaroslav@68	497	public static final char MIN_HIGH_SURROGATE = '\uD800';
jaroslav@68	498
jaroslav@68	499	/**
jaroslav@68	500	* The maximum value of a
jaroslav@68	501	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	502	* Unicode high-surrogate code unit</a>
jaroslav@68	503	* in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
jaroslav@68	504	* A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68	505	*
jaroslav@68	506	* @since 1.5
jaroslav@68	507	*/
jaroslav@68	508	public static final char MAX_HIGH_SURROGATE = '\uDBFF';
jaroslav@68	509
jaroslav@68	510	/**
jaroslav@68	511	* The minimum value of a
jaroslav@68	512	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	513	* Unicode low-surrogate code unit</a>
jaroslav@68	514	* in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
jaroslav@68	515	* A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68	516	*
jaroslav@68	517	* @since 1.5
jaroslav@68	518	*/
jaroslav@68	519	public static final char MIN_LOW_SURROGATE = '\uDC00';
jaroslav@68	520
jaroslav@68	521	/**
jaroslav@68	522	* The maximum value of a
jaroslav@68	523	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	524	* Unicode low-surrogate code unit</a>
jaroslav@68	525	* in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68	526	* A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68	527	*
jaroslav@68	528	* @since 1.5
jaroslav@68	529	*/
jaroslav@68	530	public static final char MAX_LOW_SURROGATE = '\uDFFF';
jaroslav@68	531
jaroslav@68	532	/**
jaroslav@68	533	* The minimum value of a Unicode surrogate code unit in the
jaroslav@68	534	* UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68	535	*
jaroslav@68	536	* @since 1.5
jaroslav@68	537	*/
jaroslav@68	538	public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
jaroslav@68	539
jaroslav@68	540	/**
jaroslav@68	541	* The maximum value of a Unicode surrogate code unit in the
jaroslav@68	542	* UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68	543	*
jaroslav@68	544	* @since 1.5
jaroslav@68	545	*/
jaroslav@68	546	public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
jaroslav@68	547
jaroslav@68	548	/**
jaroslav@68	549	* The minimum value of a
jaroslav@68	550	* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
jaroslav@68	551	* Unicode supplementary code point</a>, constant {@code U+10000}.
jaroslav@68	552	*
jaroslav@68	553	* @since 1.5
jaroslav@68	554	*/
jaroslav@68	555	public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
jaroslav@68	556
jaroslav@68	557	/**
jaroslav@68	558	* The minimum value of a
jaroslav@68	559	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	560	* Unicode code point</a>, constant {@code U+0000}.
jaroslav@68	561	*
jaroslav@68	562	* @since 1.5
jaroslav@68	563	*/
jaroslav@68	564	public static final int MIN_CODE_POINT = 0x000000;
jaroslav@68	565
jaroslav@68	566	/**
jaroslav@68	567	* The maximum value of a
jaroslav@68	568	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	569	* Unicode code point</a>, constant {@code U+10FFFF}.
jaroslav@68	570	*
jaroslav@68	571	* @since 1.5
jaroslav@68	572	*/
jaroslav@68	573	public static final int MAX_CODE_POINT = 0X10FFFF;
jaroslav@68	574
jaroslav@68	575
jaroslav@68	576	/**
jaroslav@68	577	* Instances of this class represent particular subsets of the Unicode
jaroslav@68	578	* character set. The only family of subsets defined in the
jaroslav@68	579	* {@code Character} class is {@link Character.UnicodeBlock}.
jaroslav@68	580	* Other portions of the Java API may define other subsets for their
jaroslav@68	581	* own purposes.
jaroslav@68	582	*
jaroslav@68	583	* @since 1.2
jaroslav@68	584	*/
jaroslav@68	585	public static class Subset {
jaroslav@68	586
jaroslav@68	587	private String name;
jaroslav@68	588
jaroslav@68	589	/**
jaroslav@68	590	* Constructs a new {@code Subset} instance.
jaroslav@68	591	*
jaroslav@68	592	* @param name The name of this subset
jaroslav@68	593	* @exception NullPointerException if name is {@code null}
jaroslav@68	594	*/
jaroslav@68	595	protected Subset(String name) {
jaroslav@68	596	if (name == null) {
jaroslav@68	597	throw new NullPointerException("name");
jaroslav@68	598	}
jaroslav@68	599	this.name = name;
jaroslav@68	600	}
jaroslav@68	601
jaroslav@68	602	/**
jaroslav@68	603	* Compares two {@code Subset} objects for equality.
jaroslav@68	604	* This method returns {@code true} if and only if
jaroslav@68	605	* {@code this} and the argument refer to the same
jaroslav@68	606	* object; since this method is {@code final}, this
jaroslav@68	607	* guarantee holds for all subclasses.
jaroslav@68	608	*/
jaroslav@68	609	public final boolean equals(Object obj) {
jaroslav@68	610	return (this == obj);
jaroslav@68	611	}
jaroslav@68	612
jaroslav@68	613	/**
jaroslav@68	614	* Returns the standard hash code as defined by the
jaroslav@68	615	* {@link Object#hashCode} method. This method
jaroslav@68	616	* is {@code final} in order to ensure that the
jaroslav@68	617	* {@code equals} and {@code hashCode} methods will
jaroslav@68	618	* be consistent in all subclasses.
jaroslav@68	619	*/
jaroslav@68	620	public final int hashCode() {
jaroslav@68	621	return super.hashCode();
jaroslav@68	622	}
jaroslav@68	623
jaroslav@68	624	/**
jaroslav@68	625	* Returns the name of this subset.
jaroslav@68	626	*/
jaroslav@68	627	public final String toString() {
jaroslav@68	628	return name;
jaroslav@68	629	}
jaroslav@68	630	}
jaroslav@68	631
jaroslav@68	632	// See http://www.unicode.org/Public/UNIDATA/Blocks.txt
jaroslav@68	633	// for the latest specification of Unicode Blocks.
jaroslav@68	634
jaroslav@68	635
jaroslav@68	636	/**
jaroslav@68	637	* The value of the {@code Character}.
jaroslav@68	638	*
jaroslav@68	639	* @serial
jaroslav@68	640	*/
jaroslav@68	641	private final char value;
jaroslav@68	642
jaroslav@68	643	/** use serialVersionUID from JDK 1.0.2 for interoperability */
jaroslav@68	644	private static final long serialVersionUID = 3786198910865385080L;
jaroslav@68	645
jaroslav@68	646	/**
jaroslav@68	647	* Constructs a newly allocated {@code Character} object that
jaroslav@68	648	* represents the specified {@code char} value.
jaroslav@68	649	*
jaroslav@68	650	* @param value the value to be represented by the
jaroslav@68	651	* {@code Character} object.
jaroslav@68	652	*/
jaroslav@68	653	public Character(char value) {
jaroslav@68	654	this.value = value;
jaroslav@68	655	}
jaroslav@68	656
jaroslav@68	657	private static class CharacterCache {
jaroslav@68	658	private CharacterCache(){}
jaroslav@68	659
jaroslav@68	660	static final Character cache[] = new Character[127 + 1];
jaroslav@68	661
jaroslav@68	662	static {
jaroslav@68	663	for (int i = 0; i < cache.length; i++)
jaroslav@68	664	cache[i] = new Character((char)i);
jaroslav@68	665	}
jaroslav@68	666	}
jaroslav@68	667
jaroslav@68	668	/**
jaroslav@68	669	* Returns a <tt>Character</tt> instance representing the specified
jaroslav@68	670	* <tt>char</tt> value.
jaroslav@68	671	* If a new <tt>Character</tt> instance is not required, this method
jaroslav@68	672	* should generally be used in preference to the constructor
jaroslav@68	673	* {@link #Character(char)}, as this method is likely to yield
jaroslav@68	674	* significantly better space and time performance by caching
jaroslav@68	675	* frequently requested values.
jaroslav@68	676	*
jaroslav@68	677	* This method will always cache values in the range {@code
jaroslav@68	678	* '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
jaroslav@68	679	* cache other values outside of this range.
jaroslav@68	680	*
jaroslav@68	681	* @param c a char value.
jaroslav@68	682	* @return a <tt>Character</tt> instance representing <tt>c</tt>.
jaroslav@68	683	* @since 1.5
jaroslav@68	684	*/
jaroslav@68	685	public static Character valueOf(char c) {
jaroslav@68	686	if (c <= 127) { // must cache
jaroslav@68	687	return CharacterCache.cache[(int)c];
jaroslav@68	688	}
jaroslav@68	689	return new Character(c);
jaroslav@68	690	}
jaroslav@68	691
jaroslav@68	692	/**
jaroslav@68	693	* Returns the value of this {@code Character} object.
jaroslav@68	694	* @return the primitive {@code char} value represented by
jaroslav@68	695	* this object.
jaroslav@68	696	*/
jaroslav@68	697	public char charValue() {
jaroslav@68	698	return value;
jaroslav@68	699	}
jaroslav@68	700
jaroslav@68	701	/**
jaroslav@68	702	* Returns a hash code for this {@code Character}; equal to the result
jaroslav@68	703	* of invoking {@code charValue()}.
jaroslav@68	704	*
jaroslav@68	705	* @return a hash code value for this {@code Character}
jaroslav@68	706	*/
jaroslav@68	707	public int hashCode() {
jaroslav@68	708	return (int)value;
jaroslav@68	709	}
jaroslav@68	710
jaroslav@68	711	/**
jaroslav@68	712	* Compares this object against the specified object.
jaroslav@68	713	* The result is {@code true} if and only if the argument is not
jaroslav@68	714	* {@code null} and is a {@code Character} object that
jaroslav@68	715	* represents the same {@code char} value as this object.
jaroslav@68	716	*
jaroslav@68	717	* @param obj the object to compare with.
jaroslav@68	718	* @return {@code true} if the objects are the same;
jaroslav@68	719	* {@code false} otherwise.
jaroslav@68	720	*/
jaroslav@68	721	public boolean equals(Object obj) {
jaroslav@68	722	if (obj instanceof Character) {
jaroslav@68	723	return value == ((Character)obj).charValue();
jaroslav@68	724	}
jaroslav@68	725	return false;
jaroslav@68	726	}
jaroslav@68	727
jaroslav@68	728	/**
jaroslav@68	729	* Returns a {@code String} object representing this
jaroslav@68	730	* {@code Character}'s value. The result is a string of
jaroslav@68	731	* length 1 whose sole component is the primitive
jaroslav@68	732	* {@code char} value represented by this
jaroslav@68	733	* {@code Character} object.
jaroslav@68	734	*
jaroslav@68	735	* @return a string representation of this object.
jaroslav@68	736	*/
jaroslav@68	737	public String toString() {
jaroslav@68	738	char buf[] = {value};
jaroslav@68	739	return String.valueOf(buf);
jaroslav@68	740	}
jaroslav@68	741
jaroslav@68	742	/**
jaroslav@68	743	* Returns a {@code String} object representing the
jaroslav@68	744	* specified {@code char}. The result is a string of length
jaroslav@68	745	* 1 consisting solely of the specified {@code char}.
jaroslav@68	746	*
jaroslav@68	747	* @param c the {@code char} to be converted
jaroslav@68	748	* @return the string representation of the specified {@code char}
jaroslav@68	749	* @since 1.4
jaroslav@68	750	*/
jaroslav@68	751	public static String toString(char c) {
jaroslav@68	752	return String.valueOf(c);
jaroslav@68	753	}
jaroslav@68	754
jaroslav@68	755	/**
jaroslav@68	756	* Determines whether the specified code point is a valid
jaroslav@68	757	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	758	* Unicode code point value</a>.
jaroslav@68	759	*
jaroslav@68	760	* @param codePoint the Unicode code point to be tested
jaroslav@68	761	* @return {@code true} if the specified code point value is between
jaroslav@68	762	* {@link #MIN_CODE_POINT} and
jaroslav@68	763	* {@link #MAX_CODE_POINT} inclusive;
jaroslav@68	764	* {@code false} otherwise.
jaroslav@68	765	* @since 1.5
jaroslav@68	766	*/
jaroslav@68	767	public static boolean isValidCodePoint(int codePoint) {
jaroslav@68	768	// Optimized form of:
jaroslav@68	769	// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
jaroslav@68	770	int plane = codePoint >>> 16;
jaroslav@68	771	return plane < ((MAX_CODE_POINT + 1) >>> 16);
jaroslav@68	772	}
jaroslav@68	773
jaroslav@68	774	/**
jaroslav@68	775	* Determines whether the specified character (Unicode code point)
jaroslav@68	776	* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
jaroslav@68	777	* Such code points can be represented using a single {@code char}.
jaroslav@68	778	*
jaroslav@68	779	* @param codePoint the character (Unicode code point) to be tested
jaroslav@68	780	* @return {@code true} if the specified code point is between
jaroslav@68	781	* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
jaroslav@68	782	* {@code false} otherwise.
jaroslav@68	783	* @since 1.7
jaroslav@68	784	*/
jaroslav@68	785	public static boolean isBmpCodePoint(int codePoint) {
jaroslav@68	786	return codePoint >>> 16 == 0;
jaroslav@68	787	// Optimized form of:
jaroslav@68	788	// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
jaroslav@68	789	// We consistently use logical shift (>>>) to facilitate
jaroslav@68	790	// additional runtime optimizations.
jaroslav@68	791	}
jaroslav@68	792
jaroslav@68	793	/**
jaroslav@68	794	* Determines whether the specified character (Unicode code point)
jaroslav@68	795	* is in the <a href="#supplementary">supplementary character</a> range.
jaroslav@68	796	*
jaroslav@68	797	* @param codePoint the character (Unicode code point) to be tested
jaroslav@68	798	* @return {@code true} if the specified code point is between
jaroslav@68	799	* {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
jaroslav@68	800	* {@link #MAX_CODE_POINT} inclusive;
jaroslav@68	801	* {@code false} otherwise.
jaroslav@68	802	* @since 1.5
jaroslav@68	803	*/
jaroslav@68	804	public static boolean isSupplementaryCodePoint(int codePoint) {
jaroslav@68	805	return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68	806	&& codePoint < MAX_CODE_POINT + 1;
jaroslav@68	807	}
jaroslav@68	808
jaroslav@68	809	/**
jaroslav@68	810	* Determines if the given {@code char} value is a
jaroslav@68	811	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	812	* Unicode high-surrogate code unit</a>
jaroslav@68	813	* (also known as <i>leading-surrogate code unit</i>).
jaroslav@68	814	*
jaroslav@68	815	* <p>Such values do not represent characters by themselves,
jaroslav@68	816	* but are used in the representation of
jaroslav@68	817	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	818	* in the UTF-16 encoding.
jaroslav@68	819	*
jaroslav@68	820	* @param ch the {@code char} value to be tested.
jaroslav@68	821	* @return {@code true} if the {@code char} value is between
jaroslav@68	822	* {@link #MIN_HIGH_SURROGATE} and
jaroslav@68	823	* {@link #MAX_HIGH_SURROGATE} inclusive;
jaroslav@68	824	* {@code false} otherwise.
jaroslav@68	825	* @see Character#isLowSurrogate(char)
jaroslav@68	826	* @see Character.UnicodeBlock#of(int)
jaroslav@68	827	* @since 1.5
jaroslav@68	828	*/
jaroslav@68	829	public static boolean isHighSurrogate(char ch) {
jaroslav@68	830	// Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
jaroslav@68	831	return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
jaroslav@68	832	}
jaroslav@68	833
jaroslav@68	834	/**
jaroslav@68	835	* Determines if the given {@code char} value is a
jaroslav@68	836	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	837	* Unicode low-surrogate code unit</a>
jaroslav@68	838	* (also known as <i>trailing-surrogate code unit</i>).
jaroslav@68	839	*
jaroslav@68	840	* <p>Such values do not represent characters by themselves,
jaroslav@68	841	* but are used in the representation of
jaroslav@68	842	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	843	* in the UTF-16 encoding.
jaroslav@68	844	*
jaroslav@68	845	* @param ch the {@code char} value to be tested.
jaroslav@68	846	* @return {@code true} if the {@code char} value is between
jaroslav@68	847	* {@link #MIN_LOW_SURROGATE} and
jaroslav@68	848	* {@link #MAX_LOW_SURROGATE} inclusive;
jaroslav@68	849	* {@code false} otherwise.
jaroslav@68	850	* @see Character#isHighSurrogate(char)
jaroslav@68	851	* @since 1.5
jaroslav@68	852	*/
jaroslav@68	853	public static boolean isLowSurrogate(char ch) {
jaroslav@68	854	return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
jaroslav@68	855	}
jaroslav@68	856
jaroslav@68	857	/**
jaroslav@68	858	* Determines if the given {@code char} value is a Unicode
jaroslav@68	859	* <i>surrogate code unit</i>.
jaroslav@68	860	*
jaroslav@68	861	* <p>Such values do not represent characters by themselves,
jaroslav@68	862	* but are used in the representation of
jaroslav@68	863	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	864	* in the UTF-16 encoding.
jaroslav@68	865	*
jaroslav@68	866	* <p>A char value is a surrogate code unit if and only if it is either
jaroslav@68	867	* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
jaroslav@68	868	* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
jaroslav@68	869	*
jaroslav@68	870	* @param ch the {@code char} value to be tested.
jaroslav@68	871	* @return {@code true} if the {@code char} value is between
jaroslav@68	872	* {@link #MIN_SURROGATE} and
jaroslav@68	873	* {@link #MAX_SURROGATE} inclusive;
jaroslav@68	874	* {@code false} otherwise.
jaroslav@68	875	* @since 1.7
jaroslav@68	876	*/
jaroslav@68	877	public static boolean isSurrogate(char ch) {
jaroslav@68	878	return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
jaroslav@68	879	}
jaroslav@68	880
jaroslav@68	881	/**
jaroslav@68	882	* Determines whether the specified pair of {@code char}
jaroslav@68	883	* values is a valid
jaroslav@68	884	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	885	* Unicode surrogate pair</a>.
jaroslav@68	886
jaroslav@68	887	* <p>This method is equivalent to the expression:
jaroslav@68	888	* <blockquote><pre>
jaroslav@68	889	* isHighSurrogate(high) && isLowSurrogate(low)
jaroslav@68	890	* </pre></blockquote>
jaroslav@68	891	*
jaroslav@68	892	* @param high the high-surrogate code value to be tested
jaroslav@68	893	* @param low the low-surrogate code value to be tested
jaroslav@68	894	* @return {@code true} if the specified high and
jaroslav@68	895	* low-surrogate code values represent a valid surrogate pair;
jaroslav@68	896	* {@code false} otherwise.
jaroslav@68	897	* @since 1.5
jaroslav@68	898	*/
jaroslav@68	899	public static boolean isSurrogatePair(char high, char low) {
jaroslav@68	900	return isHighSurrogate(high) && isLowSurrogate(low);
jaroslav@68	901	}
jaroslav@68	902
jaroslav@68	903	/**
jaroslav@68	904	* Determines the number of {@code char} values needed to
jaroslav@68	905	* represent the specified character (Unicode code point). If the
jaroslav@68	906	* specified character is equal to or greater than 0x10000, then
jaroslav@68	907	* the method returns 2. Otherwise, the method returns 1.
jaroslav@68	908	*
jaroslav@68	909	* <p>This method doesn't validate the specified character to be a
jaroslav@68	910	* valid Unicode code point. The caller must validate the
jaroslav@68	911	* character value using {@link #isValidCodePoint(int) isValidCodePoint}
jaroslav@68	912	* if necessary.
jaroslav@68	913	*
jaroslav@68	914	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	915	* @return 2 if the character is a valid supplementary character; 1 otherwise.
jaroslav@68	916	* @see Character#isSupplementaryCodePoint(int)
jaroslav@68	917	* @since 1.5
jaroslav@68	918	*/
jaroslav@68	919	public static int charCount(int codePoint) {
jaroslav@68	920	return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
jaroslav@68	921	}
jaroslav@68	922
jaroslav@68	923	/**
jaroslav@68	924	* Converts the specified surrogate pair to its supplementary code
jaroslav@68	925	* point value. This method does not validate the specified
jaroslav@68	926	* surrogate pair. The caller must validate it using {@link
jaroslav@68	927	* #isSurrogatePair(char, char) isSurrogatePair} if necessary.
jaroslav@68	928	*
jaroslav@68	929	* @param high the high-surrogate code unit
jaroslav@68	930	* @param low the low-surrogate code unit
jaroslav@68	931	* @return the supplementary code point composed from the
jaroslav@68	932	* specified surrogate pair.
jaroslav@68	933	* @since 1.5
jaroslav@68	934	*/
jaroslav@68	935	public static int toCodePoint(char high, char low) {
jaroslav@68	936	// Optimized form of:
jaroslav@68	937	// return ((high - MIN_HIGH_SURROGATE) << 10)
jaroslav@68	938	// + (low - MIN_LOW_SURROGATE)
jaroslav@68	939	// + MIN_SUPPLEMENTARY_CODE_POINT;
jaroslav@68	940	return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68	941	- (MIN_HIGH_SURROGATE << 10)
jaroslav@68	942	- MIN_LOW_SURROGATE);
jaroslav@68	943	}
jaroslav@68	944
jaroslav@68	945	/**
jaroslav@68	946	* Returns the code point at the given index of the
jaroslav@68	947	* {@code CharSequence}. If the {@code char} value at
jaroslav@68	948	* the given index in the {@code CharSequence} is in the
jaroslav@68	949	* high-surrogate range, the following index is less than the
jaroslav@68	950	* length of the {@code CharSequence}, and the
jaroslav@68	951	* {@code char} value at the following index is in the
jaroslav@68	952	* low-surrogate range, then the supplementary code point
jaroslav@68	953	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	954	* the {@code char} value at the given index is returned.
jaroslav@68	955	*
jaroslav@68	956	* @param seq a sequence of {@code char} values (Unicode code
jaroslav@68	957	* units)
jaroslav@68	958	* @param index the index to the {@code char} values (Unicode
jaroslav@68	959	* code units) in {@code seq} to be converted
jaroslav@68	960	* @return the Unicode code point at the given index
jaroslav@68	961	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	962	* @exception IndexOutOfBoundsException if the value
jaroslav@68	963	* {@code index} is negative or not less than
jaroslav@68	964	* {@link CharSequence#length() seq.length()}.
jaroslav@68	965	* @since 1.5
jaroslav@68	966	*/
jaroslav@68	967	public static int codePointAt(CharSequence seq, int index) {
jaroslav@68	968	char c1 = seq.charAt(index++);
jaroslav@68	969	if (isHighSurrogate(c1)) {
jaroslav@68	970	if (index < seq.length()) {
jaroslav@68	971	char c2 = seq.charAt(index);
jaroslav@68	972	if (isLowSurrogate(c2)) {
jaroslav@68	973	return toCodePoint(c1, c2);
jaroslav@68	974	}
jaroslav@68	975	}
jaroslav@68	976	}
jaroslav@68	977	return c1;
jaroslav@68	978	}
jaroslav@68	979
jaroslav@68	980	/**
jaroslav@68	981	* Returns the code point at the given index of the
jaroslav@68	982	* {@code char} array. If the {@code char} value at
jaroslav@68	983	* the given index in the {@code char} array is in the
jaroslav@68	984	* high-surrogate range, the following index is less than the
jaroslav@68	985	* length of the {@code char} array, and the
jaroslav@68	986	* {@code char} value at the following index is in the
jaroslav@68	987	* low-surrogate range, then the supplementary code point
jaroslav@68	988	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	989	* the {@code char} value at the given index is returned.
jaroslav@68	990	*
jaroslav@68	991	* @param a the {@code char} array
jaroslav@68	992	* @param index the index to the {@code char} values (Unicode
jaroslav@68	993	* code units) in the {@code char} array to be converted
jaroslav@68	994	* @return the Unicode code point at the given index
jaroslav@68	995	* @exception NullPointerException if {@code a} is null.
jaroslav@68	996	* @exception IndexOutOfBoundsException if the value
jaroslav@68	997	* {@code index} is negative or not less than
jaroslav@68	998	* the length of the {@code char} array.
jaroslav@68	999	* @since 1.5
jaroslav@68	1000	*/
jaroslav@68	1001	public static int codePointAt(char[] a, int index) {
jaroslav@68	1002	return codePointAtImpl(a, index, a.length);
jaroslav@68	1003	}
jaroslav@68	1004
jaroslav@68	1005	/**
jaroslav@68	1006	* Returns the code point at the given index of the
jaroslav@68	1007	* {@code char} array, where only array elements with
jaroslav@68	1008	* {@code index} less than {@code limit} can be used. If
jaroslav@68	1009	* the {@code char} value at the given index in the
jaroslav@68	1010	* {@code char} array is in the high-surrogate range, the
jaroslav@68	1011	* following index is less than the {@code limit}, and the
jaroslav@68	1012	* {@code char} value at the following index is in the
jaroslav@68	1013	* low-surrogate range, then the supplementary code point
jaroslav@68	1014	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1015	* the {@code char} value at the given index is returned.
jaroslav@68	1016	*
jaroslav@68	1017	* @param a the {@code char} array
jaroslav@68	1018	* @param index the index to the {@code char} values (Unicode
jaroslav@68	1019	* code units) in the {@code char} array to be converted
jaroslav@68	1020	* @param limit the index after the last array element that
jaroslav@68	1021	* can be used in the {@code char} array
jaroslav@68	1022	* @return the Unicode code point at the given index
jaroslav@68	1023	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1024	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1025	* argument is negative or not less than the {@code limit}
jaroslav@68	1026	* argument, or if the {@code limit} argument is negative or
jaroslav@68	1027	* greater than the length of the {@code char} array.
jaroslav@68	1028	* @since 1.5
jaroslav@68	1029	*/
jaroslav@68	1030	public static int codePointAt(char[] a, int index, int limit) {
jaroslav@68	1031	if (index >= limit \|\| limit < 0 \|\| limit > a.length) {
jaroslav@68	1032	throw new IndexOutOfBoundsException();
jaroslav@68	1033	}
jaroslav@68	1034	return codePointAtImpl(a, index, limit);
jaroslav@68	1035	}
jaroslav@68	1036
jaroslav@68	1037	// throws ArrayIndexOutofBoundsException if index out of bounds
jaroslav@68	1038	static int codePointAtImpl(char[] a, int index, int limit) {
jaroslav@68	1039	char c1 = a[index++];
jaroslav@68	1040	if (isHighSurrogate(c1)) {
jaroslav@68	1041	if (index < limit) {
jaroslav@68	1042	char c2 = a[index];
jaroslav@68	1043	if (isLowSurrogate(c2)) {
jaroslav@68	1044	return toCodePoint(c1, c2);
jaroslav@68	1045	}
jaroslav@68	1046	}
jaroslav@68	1047	}
jaroslav@68	1048	return c1;
jaroslav@68	1049	}
jaroslav@68	1050
jaroslav@68	1051	/**
jaroslav@68	1052	* Returns the code point preceding the given index of the
jaroslav@68	1053	* {@code CharSequence}. If the {@code char} value at
jaroslav@68	1054	* {@code (index - 1)} in the {@code CharSequence} is in
jaroslav@68	1055	* the low-surrogate range, {@code (index - 2)} is not
jaroslav@68	1056	* negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68	1057	* in the {@code CharSequence} is in the
jaroslav@68	1058	* high-surrogate range, then the supplementary code point
jaroslav@68	1059	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1060	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1061	* returned.
jaroslav@68	1062	*
jaroslav@68	1063	* @param seq the {@code CharSequence} instance
jaroslav@68	1064	* @param index the index following the code point that should be returned
jaroslav@68	1065	* @return the Unicode code point value before the given index.
jaroslav@68	1066	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1067	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1068	* argument is less than 1 or greater than {@link
jaroslav@68	1069	* CharSequence#length() seq.length()}.
jaroslav@68	1070	* @since 1.5
jaroslav@68	1071	*/
jaroslav@68	1072	public static int codePointBefore(CharSequence seq, int index) {
jaroslav@68	1073	char c2 = seq.charAt(--index);
jaroslav@68	1074	if (isLowSurrogate(c2)) {
jaroslav@68	1075	if (index > 0) {
jaroslav@68	1076	char c1 = seq.charAt(--index);
jaroslav@68	1077	if (isHighSurrogate(c1)) {
jaroslav@68	1078	return toCodePoint(c1, c2);
jaroslav@68	1079	}
jaroslav@68	1080	}
jaroslav@68	1081	}
jaroslav@68	1082	return c2;
jaroslav@68	1083	}
jaroslav@68	1084
jaroslav@68	1085	/**
jaroslav@68	1086	* Returns the code point preceding the given index of the
jaroslav@68	1087	* {@code char} array. If the {@code char} value at
jaroslav@68	1088	* {@code (index - 1)} in the {@code char} array is in
jaroslav@68	1089	* the low-surrogate range, {@code (index - 2)} is not
jaroslav@68	1090	* negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68	1091	* in the {@code char} array is in the
jaroslav@68	1092	* high-surrogate range, then the supplementary code point
jaroslav@68	1093	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1094	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1095	* returned.
jaroslav@68	1096	*
jaroslav@68	1097	* @param a the {@code char} array
jaroslav@68	1098	* @param index the index following the code point that should be returned
jaroslav@68	1099	* @return the Unicode code point value before the given index.
jaroslav@68	1100	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1101	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1102	* argument is less than 1 or greater than the length of the
jaroslav@68	1103	* {@code char} array
jaroslav@68	1104	* @since 1.5
jaroslav@68	1105	*/
jaroslav@68	1106	public static int codePointBefore(char[] a, int index) {
jaroslav@68	1107	return codePointBeforeImpl(a, index, 0);
jaroslav@68	1108	}
jaroslav@68	1109
jaroslav@68	1110	/**
jaroslav@68	1111	* Returns the code point preceding the given index of the
jaroslav@68	1112	* {@code char} array, where only array elements with
jaroslav@68	1113	* {@code index} greater than or equal to {@code start}
jaroslav@68	1114	* can be used. If the {@code char} value at {@code (index - 1)}
jaroslav@68	1115	* in the {@code char} array is in the
jaroslav@68	1116	* low-surrogate range, {@code (index - 2)} is not less than
jaroslav@68	1117	* {@code start}, and the {@code char} value at
jaroslav@68	1118	* {@code (index - 2)} in the {@code char} array is in
jaroslav@68	1119	* the high-surrogate range, then the supplementary code point
jaroslav@68	1120	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1121	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1122	* returned.
jaroslav@68	1123	*
jaroslav@68	1124	* @param a the {@code char} array
jaroslav@68	1125	* @param index the index following the code point that should be returned
jaroslav@68	1126	* @param start the index of the first array element in the
jaroslav@68	1127	* {@code char} array
jaroslav@68	1128	* @return the Unicode code point value before the given index.
jaroslav@68	1129	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1130	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1131	* argument is not greater than the {@code start} argument or
jaroslav@68	1132	* is greater than the length of the {@code char} array, or
jaroslav@68	1133	* if the {@code start} argument is negative or not less than
jaroslav@68	1134	* the length of the {@code char} array.
jaroslav@68	1135	* @since 1.5
jaroslav@68	1136	*/
jaroslav@68	1137	public static int codePointBefore(char[] a, int index, int start) {
jaroslav@68	1138	if (index <= start \|\| start < 0 \|\| start >= a.length) {
jaroslav@68	1139	throw new IndexOutOfBoundsException();
jaroslav@68	1140	}
jaroslav@68	1141	return codePointBeforeImpl(a, index, start);
jaroslav@68	1142	}
jaroslav@68	1143
jaroslav@68	1144	// throws ArrayIndexOutofBoundsException if index-1 out of bounds
jaroslav@68	1145	static int codePointBeforeImpl(char[] a, int index, int start) {
jaroslav@68	1146	char c2 = a[--index];
jaroslav@68	1147	if (isLowSurrogate(c2)) {
jaroslav@68	1148	if (index > start) {
jaroslav@68	1149	char c1 = a[--index];
jaroslav@68	1150	if (isHighSurrogate(c1)) {
jaroslav@68	1151	return toCodePoint(c1, c2);
jaroslav@68	1152	}
jaroslav@68	1153	}
jaroslav@68	1154	}
jaroslav@68	1155	return c2;
jaroslav@68	1156	}
jaroslav@68	1157
jaroslav@68	1158	/**
jaroslav@68	1159	* Returns the leading surrogate (a
jaroslav@68	1160	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	1161	* high surrogate code unit</a>) of the
jaroslav@68	1162	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	1163	* surrogate pair</a>
jaroslav@68	1164	* representing the specified supplementary character (Unicode
jaroslav@68	1165	* code point) in the UTF-16 encoding. If the specified character
jaroslav@68	1166	* is not a
jaroslav@68	1167	* <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68	1168	* an unspecified {@code char} is returned.
jaroslav@68	1169	*
jaroslav@68	1170	* <p>If
jaroslav@68	1171	* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68	1172	* is {@code true}, then
jaroslav@68	1173	* {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
jaroslav@68	1174	* {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
jaroslav@68	1175	* are also always {@code true}.
jaroslav@68	1176	*
jaroslav@68	1177	* @param codePoint a supplementary character (Unicode code point)
jaroslav@68	1178	* @return the leading surrogate code unit used to represent the
jaroslav@68	1179	* character in the UTF-16 encoding
jaroslav@68	1180	* @since 1.7
jaroslav@68	1181	*/
jaroslav@68	1182	public static char highSurrogate(int codePoint) {
jaroslav@68	1183	return (char) ((codePoint >>> 10)
jaroslav@68	1184	+ (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
jaroslav@68	1185	}
jaroslav@68	1186
jaroslav@68	1187	/**
jaroslav@68	1188	* Returns the trailing surrogate (a
jaroslav@68	1189	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	1190	* low surrogate code unit</a>) of the
jaroslav@68	1191	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	1192	* surrogate pair</a>
jaroslav@68	1193	* representing the specified supplementary character (Unicode
jaroslav@68	1194	* code point) in the UTF-16 encoding. If the specified character
jaroslav@68	1195	* is not a
jaroslav@68	1196	* <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68	1197	* an unspecified {@code char} is returned.
jaroslav@68	1198	*
jaroslav@68	1199	* <p>If
jaroslav@68	1200	* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68	1201	* is {@code true}, then
jaroslav@68	1202	* {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
jaroslav@68	1203	* {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
jaroslav@68	1204	* are also always {@code true}.
jaroslav@68	1205	*
jaroslav@68	1206	* @param codePoint a supplementary character (Unicode code point)
jaroslav@68	1207	* @return the trailing surrogate code unit used to represent the
jaroslav@68	1208	* character in the UTF-16 encoding
jaroslav@68	1209	* @since 1.7
jaroslav@68	1210	*/
jaroslav@68	1211	public static char lowSurrogate(int codePoint) {
jaroslav@68	1212	return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
jaroslav@68	1213	}
jaroslav@68	1214
jaroslav@68	1215	/**
jaroslav@68	1216	* Converts the specified character (Unicode code point) to its
jaroslav@68	1217	* UTF-16 representation. If the specified code point is a BMP
jaroslav@68	1218	* (Basic Multilingual Plane or Plane 0) value, the same value is
jaroslav@68	1219	* stored in {@code dst[dstIndex]}, and 1 is returned. If the
jaroslav@68	1220	* specified code point is a supplementary character, its
jaroslav@68	1221	* surrogate values are stored in {@code dst[dstIndex]}
jaroslav@68	1222	* (high-surrogate) and {@code dst[dstIndex+1]}
jaroslav@68	1223	* (low-surrogate), and 2 is returned.
jaroslav@68	1224	*
jaroslav@68	1225	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	1226	* @param dst an array of {@code char} in which the
jaroslav@68	1227	* {@code codePoint}'s UTF-16 value is stored.
jaroslav@68	1228	* @param dstIndex the start index into the {@code dst}
jaroslav@68	1229	* array where the converted value is stored.
jaroslav@68	1230	* @return 1 if the code point is a BMP code point, 2 if the
jaroslav@68	1231	* code point is a supplementary code point.
jaroslav@68	1232	* @exception IllegalArgumentException if the specified
jaroslav@68	1233	* {@code codePoint} is not a valid Unicode code point.
jaroslav@68	1234	* @exception NullPointerException if the specified {@code dst} is null.
jaroslav@68	1235	* @exception IndexOutOfBoundsException if {@code dstIndex}
jaroslav@68	1236	* is negative or not less than {@code dst.length}, or if
jaroslav@68	1237	* {@code dst} at {@code dstIndex} doesn't have enough
jaroslav@68	1238	* array element(s) to store the resulting {@code char}
jaroslav@68	1239	* value(s). (If {@code dstIndex} is equal to
jaroslav@68	1240	* {@code dst.length-1} and the specified
jaroslav@68	1241	* {@code codePoint} is a supplementary character, the
jaroslav@68	1242	* high-surrogate value is not stored in
jaroslav@68	1243	* {@code dst[dstIndex]}.)
jaroslav@68	1244	* @since 1.5
jaroslav@68	1245	*/
jaroslav@68	1246	public static int toChars(int codePoint, char[] dst, int dstIndex) {
jaroslav@68	1247	if (isBmpCodePoint(codePoint)) {
jaroslav@68	1248	dst[dstIndex] = (char) codePoint;
jaroslav@68	1249	return 1;
jaroslav@68	1250	} else if (isValidCodePoint(codePoint)) {
jaroslav@68	1251	toSurrogates(codePoint, dst, dstIndex);
jaroslav@68	1252	return 2;
jaroslav@68	1253	} else {
jaroslav@68	1254	throw new IllegalArgumentException();
jaroslav@68	1255	}
jaroslav@68	1256	}
jaroslav@68	1257
jaroslav@68	1258	/**
jaroslav@68	1259	* Converts the specified character (Unicode code point) to its
jaroslav@68	1260	* UTF-16 representation stored in a {@code char} array. If
jaroslav@68	1261	* the specified code point is a BMP (Basic Multilingual Plane or
jaroslav@68	1262	* Plane 0) value, the resulting {@code char} array has
jaroslav@68	1263	* the same value as {@code codePoint}. If the specified code
jaroslav@68	1264	* point is a supplementary code point, the resulting
jaroslav@68	1265	* {@code char} array has the corresponding surrogate pair.
jaroslav@68	1266	*
jaroslav@68	1267	* @param codePoint a Unicode code point
jaroslav@68	1268	* @return a {@code char} array having
jaroslav@68	1269	* {@code codePoint}'s UTF-16 representation.
jaroslav@68	1270	* @exception IllegalArgumentException if the specified
jaroslav@68	1271	* {@code codePoint} is not a valid Unicode code point.
jaroslav@68	1272	* @since 1.5
jaroslav@68	1273	*/
jaroslav@68	1274	public static char[] toChars(int codePoint) {
jaroslav@68	1275	if (isBmpCodePoint(codePoint)) {
jaroslav@68	1276	return new char[] { (char) codePoint };
jaroslav@68	1277	} else if (isValidCodePoint(codePoint)) {
jaroslav@68	1278	char[] result = new char[2];
jaroslav@68	1279	toSurrogates(codePoint, result, 0);
jaroslav@68	1280	return result;
jaroslav@68	1281	} else {
jaroslav@68	1282	throw new IllegalArgumentException();
jaroslav@68	1283	}
jaroslav@68	1284	}
jaroslav@68	1285
jaroslav@68	1286	static void toSurrogates(int codePoint, char[] dst, int index) {
jaroslav@68	1287	// We write elements "backwards" to guarantee all-or-nothing
jaroslav@68	1288	dst[index+1] = lowSurrogate(codePoint);
jaroslav@68	1289	dst[index] = highSurrogate(codePoint);
jaroslav@68	1290	}
jaroslav@68	1291
jaroslav@68	1292	/**
jaroslav@68	1293	* Returns the number of Unicode code points in the text range of
jaroslav@68	1294	* the specified char sequence. The text range begins at the
jaroslav@68	1295	* specified {@code beginIndex} and extends to the
jaroslav@68	1296	* {@code char} at index {@code endIndex - 1}. Thus the
jaroslav@68	1297	* length (in {@code char}s) of the text range is
jaroslav@68	1298	* {@code endIndex-beginIndex}. Unpaired surrogates within
jaroslav@68	1299	* the text range count as one code point each.
jaroslav@68	1300	*
jaroslav@68	1301	* @param seq the char sequence
jaroslav@68	1302	* @param beginIndex the index to the first {@code char} of
jaroslav@68	1303	* the text range.
jaroslav@68	1304	* @param endIndex the index after the last {@code char} of
jaroslav@68	1305	* the text range.
jaroslav@68	1306	* @return the number of Unicode code points in the specified text
jaroslav@68	1307	* range
jaroslav@68	1308	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1309	* @exception IndexOutOfBoundsException if the
jaroslav@68	1310	* {@code beginIndex} is negative, or {@code endIndex}
jaroslav@68	1311	* is larger than the length of the given sequence, or
jaroslav@68	1312	* {@code beginIndex} is larger than {@code endIndex}.
jaroslav@68	1313	* @since 1.5
jaroslav@68	1314	*/
jaroslav@68	1315	public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
jaroslav@68	1316	int length = seq.length();
jaroslav@68	1317	if (beginIndex < 0 \|\| endIndex > length \|\| beginIndex > endIndex) {
jaroslav@68	1318	throw new IndexOutOfBoundsException();
jaroslav@68	1319	}
jaroslav@68	1320	int n = endIndex - beginIndex;
jaroslav@68	1321	for (int i = beginIndex; i < endIndex; ) {
jaroslav@68	1322	if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
jaroslav@68	1323	isLowSurrogate(seq.charAt(i))) {
jaroslav@68	1324	n--;
jaroslav@68	1325	i++;
jaroslav@68	1326	}
jaroslav@68	1327	}
jaroslav@68	1328	return n;
jaroslav@68	1329	}
jaroslav@68	1330
jaroslav@68	1331	/**
jaroslav@68	1332	* Returns the number of Unicode code points in a subarray of the
jaroslav@68	1333	* {@code char} array argument. The {@code offset}
jaroslav@68	1334	* argument is the index of the first {@code char} of the
jaroslav@68	1335	* subarray and the {@code count} argument specifies the
jaroslav@68	1336	* length of the subarray in {@code char}s. Unpaired
jaroslav@68	1337	* surrogates within the subarray count as one code point each.
jaroslav@68	1338	*
jaroslav@68	1339	* @param a the {@code char} array
jaroslav@68	1340	* @param offset the index of the first {@code char} in the
jaroslav@68	1341	* given {@code char} array
jaroslav@68	1342	* @param count the length of the subarray in {@code char}s
jaroslav@68	1343	* @return the number of Unicode code points in the specified subarray
jaroslav@68	1344	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1345	* @exception IndexOutOfBoundsException if {@code offset} or
jaroslav@68	1346	* {@code count} is negative, or if {@code offset +
jaroslav@68	1347	* count} is larger than the length of the given array.
jaroslav@68	1348	* @since 1.5
jaroslav@68	1349	*/
jaroslav@68	1350	public static int codePointCount(char[] a, int offset, int count) {
jaroslav@68	1351	if (count > a.length - offset \|\| offset < 0 \|\| count < 0) {
jaroslav@68	1352	throw new IndexOutOfBoundsException();
jaroslav@68	1353	}
jaroslav@68	1354	return codePointCountImpl(a, offset, count);
jaroslav@68	1355	}
jaroslav@68	1356
jaroslav@68	1357	static int codePointCountImpl(char[] a, int offset, int count) {
jaroslav@68	1358	int endIndex = offset + count;
jaroslav@68	1359	int n = count;
jaroslav@68	1360	for (int i = offset; i < endIndex; ) {
jaroslav@68	1361	if (isHighSurrogate(a[i++]) && i < endIndex &&
jaroslav@68	1362	isLowSurrogate(a[i])) {
jaroslav@68	1363	n--;
jaroslav@68	1364	i++;
jaroslav@68	1365	}
jaroslav@68	1366	}
jaroslav@68	1367	return n;
jaroslav@68	1368	}
jaroslav@68	1369
jaroslav@68	1370	/**
jaroslav@68	1371	* Returns the index within the given char sequence that is offset
jaroslav@68	1372	* from the given {@code index} by {@code codePointOffset}
jaroslav@68	1373	* code points. Unpaired surrogates within the text range given by
jaroslav@68	1374	* {@code index} and {@code codePointOffset} count as
jaroslav@68	1375	* one code point each.
jaroslav@68	1376	*
jaroslav@68	1377	* @param seq the char sequence
jaroslav@68	1378	* @param index the index to be offset
jaroslav@68	1379	* @param codePointOffset the offset in code points
jaroslav@68	1380	* @return the index within the char sequence
jaroslav@68	1381	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1382	* @exception IndexOutOfBoundsException if {@code index}
jaroslav@68	1383	* is negative or larger then the length of the char sequence,
jaroslav@68	1384	* or if {@code codePointOffset} is positive and the
jaroslav@68	1385	* subsequence starting with {@code index} has fewer than
jaroslav@68	1386	* {@code codePointOffset} code points, or if
jaroslav@68	1387	* {@code codePointOffset} is negative and the subsequence
jaroslav@68	1388	* before {@code index} has fewer than the absolute value
jaroslav@68	1389	* of {@code codePointOffset} code points.
jaroslav@68	1390	* @since 1.5
jaroslav@68	1391	*/
jaroslav@68	1392	public static int offsetByCodePoints(CharSequence seq, int index,
jaroslav@68	1393	int codePointOffset) {
jaroslav@68	1394	int length = seq.length();
jaroslav@68	1395	if (index < 0 \|\| index > length) {
jaroslav@68	1396	throw new IndexOutOfBoundsException();
jaroslav@68	1397	}
jaroslav@68	1398
jaroslav@68	1399	int x = index;
jaroslav@68	1400	if (codePointOffset >= 0) {
jaroslav@68	1401	int i;
jaroslav@68	1402	for (i = 0; x < length && i < codePointOffset; i++) {
jaroslav@68	1403	if (isHighSurrogate(seq.charAt(x++)) && x < length &&
jaroslav@68	1404	isLowSurrogate(seq.charAt(x))) {
jaroslav@68	1405	x++;
jaroslav@68	1406	}
jaroslav@68	1407	}
jaroslav@68	1408	if (i < codePointOffset) {
jaroslav@68	1409	throw new IndexOutOfBoundsException();
jaroslav@68	1410	}
jaroslav@68	1411	} else {
jaroslav@68	1412	int i;
jaroslav@68	1413	for (i = codePointOffset; x > 0 && i < 0; i++) {
jaroslav@68	1414	if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
jaroslav@68	1415	isHighSurrogate(seq.charAt(x-1))) {
jaroslav@68	1416	x--;
jaroslav@68	1417	}
jaroslav@68	1418	}
jaroslav@68	1419	if (i < 0) {
jaroslav@68	1420	throw new IndexOutOfBoundsException();
jaroslav@68	1421	}
jaroslav@68	1422	}
jaroslav@68	1423	return x;
jaroslav@68	1424	}
jaroslav@68	1425
jaroslav@68	1426	/**
jaroslav@68	1427	* Returns the index within the given {@code char} subarray
jaroslav@68	1428	* that is offset from the given {@code index} by
jaroslav@68	1429	* {@code codePointOffset} code points. The
jaroslav@68	1430	* {@code start} and {@code count} arguments specify a
jaroslav@68	1431	* subarray of the {@code char} array. Unpaired surrogates
jaroslav@68	1432	* within the text range given by {@code index} and
jaroslav@68	1433	* {@code codePointOffset} count as one code point each.
jaroslav@68	1434	*
jaroslav@68	1435	* @param a the {@code char} array
jaroslav@68	1436	* @param start the index of the first {@code char} of the
jaroslav@68	1437	* subarray
jaroslav@68	1438	* @param count the length of the subarray in {@code char}s
jaroslav@68	1439	* @param index the index to be offset
jaroslav@68	1440	* @param codePointOffset the offset in code points
jaroslav@68	1441	* @return the index within the subarray
jaroslav@68	1442	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1443	* @exception IndexOutOfBoundsException
jaroslav@68	1444	* if {@code start} or {@code count} is negative,
jaroslav@68	1445	* or if {@code start + count} is larger than the length of
jaroslav@68	1446	* the given array,
jaroslav@68	1447	* or if {@code index} is less than {@code start} or
jaroslav@68	1448	* larger then {@code start + count},
jaroslav@68	1449	* or if {@code codePointOffset} is positive and the text range
jaroslav@68	1450	* starting with {@code index} and ending with {@code start + count - 1}
jaroslav@68	1451	* has fewer than {@code codePointOffset} code
jaroslav@68	1452	* points,
jaroslav@68	1453	* or if {@code codePointOffset} is negative and the text range
jaroslav@68	1454	* starting with {@code start} and ending with {@code index - 1}
jaroslav@68	1455	* has fewer than the absolute value of
jaroslav@68	1456	* {@code codePointOffset} code points.
jaroslav@68	1457	* @since 1.5
jaroslav@68	1458	*/
jaroslav@68	1459	public static int offsetByCodePoints(char[] a, int start, int count,
jaroslav@68	1460	int index, int codePointOffset) {
jaroslav@68	1461	if (count > a.length-start \|\| start < 0 \|\| count < 0
jaroslav@68	1462	\|\| index < start \|\| index > start+count) {
jaroslav@68	1463	throw new IndexOutOfBoundsException();
jaroslav@68	1464	}
jaroslav@68	1465	return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
jaroslav@68	1466	}
jaroslav@68	1467
jaroslav@68	1468	static int offsetByCodePointsImpl(char[]a, int start, int count,
jaroslav@68	1469	int index, int codePointOffset) {
jaroslav@68	1470	int x = index;
jaroslav@68	1471	if (codePointOffset >= 0) {
jaroslav@68	1472	int limit = start + count;
jaroslav@68	1473	int i;
jaroslav@68	1474	for (i = 0; x < limit && i < codePointOffset; i++) {
jaroslav@68	1475	if (isHighSurrogate(a[x++]) && x < limit &&
jaroslav@68	1476	isLowSurrogate(a[x])) {
jaroslav@68	1477	x++;
jaroslav@68	1478	}
jaroslav@68	1479	}
jaroslav@68	1480	if (i < codePointOffset) {
jaroslav@68	1481	throw new IndexOutOfBoundsException();
jaroslav@68	1482	}
jaroslav@68	1483	} else {
jaroslav@68	1484	int i;
jaroslav@68	1485	for (i = codePointOffset; x > start && i < 0; i++) {
jaroslav@68	1486	if (isLowSurrogate(a[--x]) && x > start &&
jaroslav@68	1487	isHighSurrogate(a[x-1])) {
jaroslav@68	1488	x--;
jaroslav@68	1489	}
jaroslav@68	1490	}
jaroslav@68	1491	if (i < 0) {
jaroslav@68	1492	throw new IndexOutOfBoundsException();
jaroslav@68	1493	}
jaroslav@68	1494	}
jaroslav@68	1495	return x;
jaroslav@68	1496	}
jaroslav@68	1497
jaroslav@68	1498	/**
jaroslav@68	1499	* Determines if the specified character is a lowercase character.
jaroslav@68	1500	* <p>
jaroslav@68	1501	* A character is lowercase if its general category type, provided
jaroslav@68	1502	* by {@code Character.getType(ch)}, is
jaroslav@68	1503	* {@code LOWERCASE_LETTER}, or it has contributory property
jaroslav@68	1504	* Other_Lowercase as defined by the Unicode Standard.
jaroslav@68	1505	* <p>
jaroslav@68	1506	* The following are examples of lowercase characters:
jaroslav@68	1507	* <p><blockquote><pre>
jaroslav@68	1508	* a b c d e f g h i j k l m n o p q r s t u v w x y z
jaroslav@68	1509	* '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
jaroslav@68	1510	* '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
jaroslav@68	1511	* '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
jaroslav@68	1512	* '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
jaroslav@68	1513	* </pre></blockquote>
jaroslav@68	1514	* <p> Many other Unicode characters are lowercase too.
jaroslav@68	1515	*
jaroslav@68	1516	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1517	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1518	* all Unicode characters, including supplementary characters, use
jaroslav@68	1519	* the {@link #isLowerCase(int)} method.
jaroslav@68	1520	*
jaroslav@68	1521	* @param ch the character to be tested.
jaroslav@68	1522	* @return {@code true} if the character is lowercase;
jaroslav@68	1523	* {@code false} otherwise.
jaroslav@68	1524	* @see Character#isLowerCase(char)
jaroslav@68	1525	* @see Character#isTitleCase(char)
jaroslav@68	1526	* @see Character#toLowerCase(char)
jaroslav@68	1527	* @see Character#getType(char)
jaroslav@68	1528	*/
jaroslav@68	1529	public static boolean isLowerCase(char ch) {
jaroslav@326	1530	return ch == toLowerCase(ch);
jaroslav@68	1531	}
jaroslav@68	1532
jaroslav@68	1533	/**
jaroslav@68	1534	* Determines if the specified character is an uppercase character.
jaroslav@68	1535	* <p>
jaroslav@68	1536	* A character is uppercase if its general category type, provided by
jaroslav@68	1537	* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
jaroslav@68	1538	* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
jaroslav@68	1539	* <p>
jaroslav@68	1540	* The following are examples of uppercase characters:
jaroslav@68	1541	* <p><blockquote><pre>
jaroslav@68	1542	* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
jaroslav@68	1543	* '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
jaroslav@68	1544	* '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
jaroslav@68	1545	* '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
jaroslav@68	1546	* '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
jaroslav@68	1547	* </pre></blockquote>
jaroslav@68	1548	* <p> Many other Unicode characters are uppercase too.<p>
jaroslav@68	1549	*
jaroslav@68	1550	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1551	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1552	* all Unicode characters, including supplementary characters, use
jaroslav@68	1553	* the {@link #isUpperCase(int)} method.
jaroslav@68	1554	*
jaroslav@68	1555	* @param ch the character to be tested.
jaroslav@68	1556	* @return {@code true} if the character is uppercase;
jaroslav@68	1557	* {@code false} otherwise.
jaroslav@68	1558	* @see Character#isLowerCase(char)
jaroslav@68	1559	* @see Character#isTitleCase(char)
jaroslav@68	1560	* @see Character#toUpperCase(char)
jaroslav@68	1561	* @see Character#getType(char)
jaroslav@68	1562	* @since 1.0
jaroslav@68	1563	*/
jaroslav@68	1564	public static boolean isUpperCase(char ch) {
jaroslav@326	1565	return ch == toUpperCase(ch);
jaroslav@68	1566	}
jaroslav@68	1567
jaroslav@68	1568	/**
jaroslav@68	1569	* Determines if the specified character is a titlecase character.
jaroslav@68	1570	* <p>
jaroslav@68	1571	* A character is a titlecase character if its general
jaroslav@68	1572	* category type, provided by {@code Character.getType(ch)},
jaroslav@68	1573	* is {@code TITLECASE_LETTER}.
jaroslav@68	1574	* <p>
jaroslav@68	1575	* Some characters look like pairs of Latin letters. For example, there
jaroslav@68	1576	* is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68	1577	* lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68	1578	* is the appropriate form to use when rendering a word in lowercase
jaroslav@68	1579	* with initial capitals, as for a book title.
jaroslav@68	1580	* <p>
jaroslav@68	1581	* These are some of the Unicode characters for which this method returns
jaroslav@68	1582	* {@code true}:
jaroslav@68	1583	* <ul>
jaroslav@68	1584	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68	1585	* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68	1586	* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68	1587	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68	1588	* </ul>
jaroslav@68	1589	* <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68	1590	*
jaroslav@68	1591	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1592	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1593	* all Unicode characters, including supplementary characters, use
jaroslav@68	1594	* the {@link #isTitleCase(int)} method.
jaroslav@68	1595	*
jaroslav@68	1596	* @param ch the character to be tested.
jaroslav@68	1597	* @return {@code true} if the character is titlecase;
jaroslav@68	1598	* {@code false} otherwise.
jaroslav@68	1599	* @see Character#isLowerCase(char)
jaroslav@68	1600	* @see Character#isUpperCase(char)
jaroslav@68	1601	* @see Character#toTitleCase(char)
jaroslav@68	1602	* @see Character#getType(char)
jaroslav@68	1603	* @since 1.0.2
jaroslav@68	1604	*/
jaroslav@68	1605	public static boolean isTitleCase(char ch) {
jaroslav@68	1606	return isTitleCase((int)ch);
jaroslav@68	1607	}
jaroslav@68	1608
jaroslav@68	1609	/**
jaroslav@68	1610	* Determines if the specified character (Unicode code point) is a titlecase character.
jaroslav@68	1611	* <p>
jaroslav@68	1612	* A character is a titlecase character if its general
jaroslav@68	1613	* category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68	1614	* is {@code TITLECASE_LETTER}.
jaroslav@68	1615	* <p>
jaroslav@68	1616	* Some characters look like pairs of Latin letters. For example, there
jaroslav@68	1617	* is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68	1618	* lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68	1619	* is the appropriate form to use when rendering a word in lowercase
jaroslav@68	1620	* with initial capitals, as for a book title.
jaroslav@68	1621	* <p>
jaroslav@68	1622	* These are some of the Unicode characters for which this method returns
jaroslav@68	1623	* {@code true}:
jaroslav@68	1624	* <ul>
jaroslav@68	1625	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68	1626	* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68	1627	* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68	1628	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68	1629	* </ul>
jaroslav@68	1630	* <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68	1631	*
jaroslav@68	1632	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1633	* @return {@code true} if the character is titlecase;
jaroslav@68	1634	* {@code false} otherwise.
jaroslav@68	1635	* @see Character#isLowerCase(int)
jaroslav@68	1636	* @see Character#isUpperCase(int)
jaroslav@68	1637	* @see Character#toTitleCase(int)
jaroslav@68	1638	* @see Character#getType(int)
jaroslav@68	1639	* @since 1.5
jaroslav@68	1640	*/
jaroslav@68	1641	public static boolean isTitleCase(int codePoint) {
jaroslav@68	1642	return getType(codePoint) == Character.TITLECASE_LETTER;
jaroslav@68	1643	}
jaroslav@68	1644
jaroslav@68	1645	/**
jaroslav@68	1646	* Determines if the specified character is a digit.
jaroslav@68	1647	* <p>
jaroslav@68	1648	* A character is a digit if its general category type, provided
jaroslav@68	1649	* by {@code Character.getType(ch)}, is
jaroslav@68	1650	* {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68	1651	* <p>
jaroslav@68	1652	* Some Unicode character ranges that contain digits:
jaroslav@68	1653	* <ul>
jaroslav@68	1654	* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68	1655	* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68	1656	* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68	1657	* Arabic-Indic digits
jaroslav@68	1658	* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68	1659	* Extended Arabic-Indic digits
jaroslav@68	1660	* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68	1661	* Devanagari digits
jaroslav@68	1662	* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68	1663	* Fullwidth digits
jaroslav@68	1664	* </ul>
jaroslav@68	1665	*
jaroslav@68	1666	* Many other character ranges contain digits as well.
jaroslav@68	1667	*
jaroslav@68	1668	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1669	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1670	* all Unicode characters, including supplementary characters, use
jaroslav@68	1671	* the {@link #isDigit(int)} method.
jaroslav@68	1672	*
jaroslav@68	1673	* @param ch the character to be tested.
jaroslav@68	1674	* @return {@code true} if the character is a digit;
jaroslav@68	1675	* {@code false} otherwise.
jaroslav@68	1676	* @see Character#digit(char, int)
jaroslav@68	1677	* @see Character#forDigit(int, int)
jaroslav@68	1678	* @see Character#getType(char)
jaroslav@68	1679	*/
jaroslav@68	1680	public static boolean isDigit(char ch) {
jaroslav@326	1681	return String.valueOf(ch).matches("\\d");
jaroslav@68	1682	}
jaroslav@68	1683
jaroslav@68	1684	/**
jaroslav@68	1685	* Determines if the specified character (Unicode code point) is a digit.
jaroslav@68	1686	* <p>
jaroslav@68	1687	* A character is a digit if its general category type, provided
jaroslav@68	1688	* by {@link Character#getType(int) getType(codePoint)}, is
jaroslav@68	1689	* {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68	1690	* <p>
jaroslav@68	1691	* Some Unicode character ranges that contain digits:
jaroslav@68	1692	* <ul>
jaroslav@68	1693	* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68	1694	* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68	1695	* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68	1696	* Arabic-Indic digits
jaroslav@68	1697	* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68	1698	* Extended Arabic-Indic digits
jaroslav@68	1699	* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68	1700	* Devanagari digits
jaroslav@68	1701	* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68	1702	* Fullwidth digits
jaroslav@68	1703	* </ul>
jaroslav@68	1704	*
jaroslav@68	1705	* Many other character ranges contain digits as well.
jaroslav@68	1706	*
jaroslav@68	1707	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1708	* @return {@code true} if the character is a digit;
jaroslav@68	1709	* {@code false} otherwise.
jaroslav@68	1710	* @see Character#forDigit(int, int)
jaroslav@68	1711	* @see Character#getType(int)
jaroslav@68	1712	* @since 1.5
jaroslav@68	1713	*/
jaroslav@68	1714	public static boolean isDigit(int codePoint) {
jaroslav@326	1715	return fromCodeChars(codePoint).matches("\\d");
jaroslav@68	1716	}
jaroslav@326	1717
jaroslav@326	1718	@JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
jaroslav@326	1719	private native static String fromCodeChars(int codePoint);
jaroslav@68	1720
jaroslav@68	1721	/**
jaroslav@68	1722	* Determines if a character is defined in Unicode.
jaroslav@68	1723	* <p>
jaroslav@68	1724	* A character is defined if at least one of the following is true:
jaroslav@68	1725	* <ul>
jaroslav@68	1726	* <li>It has an entry in the UnicodeData file.
jaroslav@68	1727	* <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68	1728	* </ul>
jaroslav@68	1729	*
jaroslav@68	1730	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1731	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1732	* all Unicode characters, including supplementary characters, use
jaroslav@68	1733	* the {@link #isDefined(int)} method.
jaroslav@68	1734	*
jaroslav@68	1735	* @param ch the character to be tested
jaroslav@68	1736	* @return {@code true} if the character has a defined meaning
jaroslav@68	1737	* in Unicode; {@code false} otherwise.
jaroslav@68	1738	* @see Character#isDigit(char)
jaroslav@68	1739	* @see Character#isLetter(char)
jaroslav@68	1740	* @see Character#isLetterOrDigit(char)
jaroslav@68	1741	* @see Character#isLowerCase(char)
jaroslav@68	1742	* @see Character#isTitleCase(char)
jaroslav@68	1743	* @see Character#isUpperCase(char)
jaroslav@68	1744	* @since 1.0.2
jaroslav@68	1745	*/
jaroslav@68	1746	public static boolean isDefined(char ch) {
jaroslav@68	1747	return isDefined((int)ch);
jaroslav@68	1748	}
jaroslav@68	1749
jaroslav@68	1750	/**
jaroslav@68	1751	* Determines if a character (Unicode code point) is defined in Unicode.
jaroslav@68	1752	* <p>
jaroslav@68	1753	* A character is defined if at least one of the following is true:
jaroslav@68	1754	* <ul>
jaroslav@68	1755	* <li>It has an entry in the UnicodeData file.
jaroslav@68	1756	* <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68	1757	* </ul>
jaroslav@68	1758	*
jaroslav@68	1759	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1760	* @return {@code true} if the character has a defined meaning
jaroslav@68	1761	* in Unicode; {@code false} otherwise.
jaroslav@68	1762	* @see Character#isDigit(int)
jaroslav@68	1763	* @see Character#isLetter(int)
jaroslav@68	1764	* @see Character#isLetterOrDigit(int)
jaroslav@68	1765	* @see Character#isLowerCase(int)
jaroslav@68	1766	* @see Character#isTitleCase(int)
jaroslav@68	1767	* @see Character#isUpperCase(int)
jaroslav@68	1768	* @since 1.5
jaroslav@68	1769	*/
jaroslav@68	1770	public static boolean isDefined(int codePoint) {
jaroslav@68	1771	return getType(codePoint) != Character.UNASSIGNED;
jaroslav@68	1772	}
jaroslav@68	1773
jaroslav@68	1774	/**
jaroslav@68	1775	* Determines if the specified character is a letter.
jaroslav@68	1776	* <p>
jaroslav@68	1777	* A character is considered to be a letter if its general
jaroslav@68	1778	* category type, provided by {@code Character.getType(ch)},
jaroslav@68	1779	* is any of the following:
jaroslav@68	1780	* <ul>
jaroslav@68	1781	* <li> {@code UPPERCASE_LETTER}
jaroslav@68	1782	* <li> {@code LOWERCASE_LETTER}
jaroslav@68	1783	* <li> {@code TITLECASE_LETTER}
jaroslav@68	1784	* <li> {@code MODIFIER_LETTER}
jaroslav@68	1785	* <li> {@code OTHER_LETTER}
jaroslav@68	1786	* </ul>
jaroslav@68	1787	*
jaroslav@68	1788	* Not all letters have case. Many characters are
jaroslav@68	1789	* letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68	1790	*
jaroslav@68	1791	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1792	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1793	* all Unicode characters, including supplementary characters, use
jaroslav@68	1794	* the {@link #isLetter(int)} method.
jaroslav@68	1795	*
jaroslav@68	1796	* @param ch the character to be tested.
jaroslav@68	1797	* @return {@code true} if the character is a letter;
jaroslav@68	1798	* {@code false} otherwise.
jaroslav@68	1799	* @see Character#isDigit(char)
jaroslav@68	1800	* @see Character#isJavaIdentifierStart(char)
jaroslav@68	1801	* @see Character#isJavaLetter(char)
jaroslav@68	1802	* @see Character#isJavaLetterOrDigit(char)
jaroslav@68	1803	* @see Character#isLetterOrDigit(char)
jaroslav@68	1804	* @see Character#isLowerCase(char)
jaroslav@68	1805	* @see Character#isTitleCase(char)
jaroslav@68	1806	* @see Character#isUnicodeIdentifierStart(char)
jaroslav@68	1807	* @see Character#isUpperCase(char)
jaroslav@68	1808	*/
jaroslav@68	1809	public static boolean isLetter(char ch) {
jaroslav@326	1810	return String.valueOf(ch).matches("\\w") && !isDigit(ch);
jaroslav@68	1811	}
jaroslav@68	1812
jaroslav@68	1813	/**
jaroslav@68	1814	* Determines if the specified character (Unicode code point) is a letter.
jaroslav@68	1815	* <p>
jaroslav@68	1816	* A character is considered to be a letter if its general
jaroslav@68	1817	* category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68	1818	* is any of the following:
jaroslav@68	1819	* <ul>
jaroslav@68	1820	* <li> {@code UPPERCASE_LETTER}
jaroslav@68	1821	* <li> {@code LOWERCASE_LETTER}
jaroslav@68	1822	* <li> {@code TITLECASE_LETTER}
jaroslav@68	1823	* <li> {@code MODIFIER_LETTER}
jaroslav@68	1824	* <li> {@code OTHER_LETTER}
jaroslav@68	1825	* </ul>
jaroslav@68	1826	*
jaroslav@68	1827	* Not all letters have case. Many characters are
jaroslav@68	1828	* letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68	1829	*
jaroslav@68	1830	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1831	* @return {@code true} if the character is a letter;
jaroslav@68	1832	* {@code false} otherwise.
jaroslav@68	1833	* @see Character#isDigit(int)
jaroslav@68	1834	* @see Character#isJavaIdentifierStart(int)
jaroslav@68	1835	* @see Character#isLetterOrDigit(int)
jaroslav@68	1836	* @see Character#isLowerCase(int)
jaroslav@68	1837	* @see Character#isTitleCase(int)
jaroslav@68	1838	* @see Character#isUnicodeIdentifierStart(int)
jaroslav@68	1839	* @see Character#isUpperCase(int)
jaroslav@68	1840	* @since 1.5
jaroslav@68	1841	*/
jaroslav@68	1842	public static boolean isLetter(int codePoint) {
jaroslav@326	1843	return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
jaroslav@68	1844	}
jaroslav@68	1845
jaroslav@68	1846	/**
jaroslav@68	1847	* Determines if the specified character is a letter or digit.
jaroslav@68	1848	* <p>
jaroslav@68	1849	* A character is considered to be a letter or digit if either
jaroslav@68	1850	* {@code Character.isLetter(char ch)} or
jaroslav@68	1851	* {@code Character.isDigit(char ch)} returns
jaroslav@68	1852	* {@code true} for the character.
jaroslav@68	1853	*
jaroslav@68	1854	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1855	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1856	* all Unicode characters, including supplementary characters, use
jaroslav@68	1857	* the {@link #isLetterOrDigit(int)} method.
jaroslav@68	1858	*
jaroslav@68	1859	* @param ch the character to be tested.
jaroslav@68	1860	* @return {@code true} if the character is a letter or digit;
jaroslav@68	1861	* {@code false} otherwise.
jaroslav@68	1862	* @see Character#isDigit(char)
jaroslav@68	1863	* @see Character#isJavaIdentifierPart(char)
jaroslav@68	1864	* @see Character#isJavaLetter(char)
jaroslav@68	1865	* @see Character#isJavaLetterOrDigit(char)
jaroslav@68	1866	* @see Character#isLetter(char)
jaroslav@68	1867	* @see Character#isUnicodeIdentifierPart(char)
jaroslav@68	1868	* @since 1.0.2
jaroslav@68	1869	*/
jaroslav@68	1870	public static boolean isLetterOrDigit(char ch) {
jaroslav@326	1871	return String.valueOf(ch).matches("\\w");
jaroslav@68	1872	}
jaroslav@68	1873
jaroslav@68	1874	/**
jaroslav@68	1875	* Determines if the specified character (Unicode code point) is a letter or digit.
jaroslav@68	1876	* <p>
jaroslav@68	1877	* A character is considered to be a letter or digit if either
jaroslav@68	1878	* {@link #isLetter(int) isLetter(codePoint)} or
jaroslav@68	1879	* {@link #isDigit(int) isDigit(codePoint)} returns
jaroslav@68	1880	* {@code true} for the character.
jaroslav@68	1881	*
jaroslav@68	1882	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1883	* @return {@code true} if the character is a letter or digit;
jaroslav@68	1884	* {@code false} otherwise.
jaroslav@68	1885	* @see Character#isDigit(int)
jaroslav@68	1886	* @see Character#isJavaIdentifierPart(int)
jaroslav@68	1887	* @see Character#isLetter(int)
jaroslav@68	1888	* @see Character#isUnicodeIdentifierPart(int)
jaroslav@68	1889	* @since 1.5
jaroslav@68	1890	*/
jaroslav@68	1891	public static boolean isLetterOrDigit(int codePoint) {
jaroslav@326	1892	return fromCodeChars(codePoint).matches("\\w");
jaroslav@68	1893	}
jaroslav@85	1894
jaroslav@85	1895	static int getType(int x) {
jaroslav@85	1896	throw new UnsupportedOperationException();
jaroslav@68	1897	}
jaroslav@563	1898
jaroslav@563	1899	/**
jaroslav@563	1900	* Determines if the specified character is
jaroslav@563	1901	* permissible as the first character in a Java identifier.
jaroslav@563	1902	* <p>
jaroslav@563	1903	* A character may start a Java identifier if and only if
jaroslav@563	1904	* one of the following conditions is true:
jaroslav@563	1905	* <ul>
jaroslav@563	1906	* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
jaroslav@563	1907	* <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
jaroslav@563	1908	* <li> {@code ch} is a currency symbol (such as {@code '$'})
jaroslav@563	1909	* <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
jaroslav@563	1910	* </ul>
jaroslav@563	1911	*
jaroslav@563	1912	* <p><b>Note:</b> This method cannot handle <a
jaroslav@563	1913	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@563	1914	* all Unicode characters, including supplementary characters, use
jaroslav@563	1915	* the {@link #isJavaIdentifierStart(int)} method.
jaroslav@563	1916	*
jaroslav@563	1917	* @param ch the character to be tested.
jaroslav@563	1918	* @return {@code true} if the character may start a Java identifier;
jaroslav@563	1919	* {@code false} otherwise.
jaroslav@563	1920	* @see Character#isJavaIdentifierPart(char)
jaroslav@563	1921	* @see Character#isLetter(char)
jaroslav@563	1922	* @see Character#isUnicodeIdentifierStart(char)
jaroslav@563	1923	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	1924	* @since 1.1
jaroslav@563	1925	*/
jaroslav@563	1926	public static boolean isJavaIdentifierStart(char ch) {
jaroslav@563	1927	return isJavaIdentifierStart((int)ch);
jaroslav@563	1928	}
jaroslav@563	1929
jaroslav@563	1930	/**
jaroslav@563	1931	* Determines if the character (Unicode code point) is
jaroslav@563	1932	* permissible as the first character in a Java identifier.
jaroslav@563	1933	* <p>
jaroslav@563	1934	* A character may start a Java identifier if and only if
jaroslav@563	1935	* one of the following conditions is true:
jaroslav@563	1936	* <ul>
jaroslav@563	1937	* <li> {@link #isLetter(int) isLetter(codePoint)}
jaroslav@563	1938	* returns {@code true}
jaroslav@563	1939	* <li> {@link #getType(int) getType(codePoint)}
jaroslav@563	1940	* returns {@code LETTER_NUMBER}
jaroslav@563	1941	* <li> the referenced character is a currency symbol (such as {@code '$'})
jaroslav@563	1942	* <li> the referenced character is a connecting punctuation character
jaroslav@563	1943	* (such as {@code '_'}).
jaroslav@563	1944	* </ul>
jaroslav@563	1945	*
jaroslav@563	1946	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@563	1947	* @return {@code true} if the character may start a Java identifier;
jaroslav@563	1948	* {@code false} otherwise.
jaroslav@563	1949	* @see Character#isJavaIdentifierPart(int)
jaroslav@563	1950	* @see Character#isLetter(int)
jaroslav@563	1951	* @see Character#isUnicodeIdentifierStart(int)
jaroslav@563	1952	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	1953	* @since 1.5
jaroslav@563	1954	*/
jaroslav@563	1955	public static boolean isJavaIdentifierStart(int codePoint) {
jaroslav@563	1956	return
jaroslav@563	1957	('A' <= codePoint && codePoint <= 'Z') \|\|
jaroslav@563	1958	('a' <= codePoint && codePoint <= 'z');
jaroslav@563	1959	}
jaroslav@563	1960
jaroslav@563	1961	/**
jaroslav@563	1962	* Determines if the specified character may be part of a Java
jaroslav@563	1963	* identifier as other than the first character.
jaroslav@563	1964	* <p>
jaroslav@563	1965	* A character may be part of a Java identifier if any of the following
jaroslav@563	1966	* are true:
jaroslav@563	1967	* <ul>
jaroslav@563	1968	* <li> it is a letter
jaroslav@563	1969	* <li> it is a currency symbol (such as {@code '$'})
jaroslav@563	1970	* <li> it is a connecting punctuation character (such as {@code '_'})
jaroslav@563	1971	* <li> it is a digit
jaroslav@563	1972	* <li> it is a numeric letter (such as a Roman numeral character)
jaroslav@563	1973	* <li> it is a combining mark
jaroslav@563	1974	* <li> it is a non-spacing mark
jaroslav@563	1975	* <li> {@code isIdentifierIgnorable} returns
jaroslav@563	1976	* {@code true} for the character
jaroslav@563	1977	* </ul>
jaroslav@563	1978	*
jaroslav@563	1979	* <p><b>Note:</b> This method cannot handle <a
jaroslav@563	1980	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@563	1981	* all Unicode characters, including supplementary characters, use
jaroslav@563	1982	* the {@link #isJavaIdentifierPart(int)} method.
jaroslav@563	1983	*
jaroslav@563	1984	* @param ch the character to be tested.
jaroslav@563	1985	* @return {@code true} if the character may be part of a
jaroslav@563	1986	* Java identifier; {@code false} otherwise.
jaroslav@563	1987	* @see Character#isIdentifierIgnorable(char)
jaroslav@563	1988	* @see Character#isJavaIdentifierStart(char)
jaroslav@563	1989	* @see Character#isLetterOrDigit(char)
jaroslav@563	1990	* @see Character#isUnicodeIdentifierPart(char)
jaroslav@563	1991	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	1992	* @since 1.1
jaroslav@563	1993	*/
jaroslav@563	1994	public static boolean isJavaIdentifierPart(char ch) {
jaroslav@563	1995	return isJavaIdentifierPart((int)ch);
jaroslav@563	1996	}
jaroslav@563	1997
jaroslav@563	1998	/**
jaroslav@563	1999	* Determines if the character (Unicode code point) may be part of a Java
jaroslav@563	2000	* identifier as other than the first character.
jaroslav@563	2001	* <p>
jaroslav@563	2002	* A character may be part of a Java identifier if any of the following
jaroslav@563	2003	* are true:
jaroslav@563	2004	* <ul>
jaroslav@563	2005	* <li> it is a letter
jaroslav@563	2006	* <li> it is a currency symbol (such as {@code '$'})
jaroslav@563	2007	* <li> it is a connecting punctuation character (such as {@code '_'})
jaroslav@563	2008	* <li> it is a digit
jaroslav@563	2009	* <li> it is a numeric letter (such as a Roman numeral character)
jaroslav@563	2010	* <li> it is a combining mark
jaroslav@563	2011	* <li> it is a non-spacing mark
jaroslav@563	2012	* <li> {@link #isIdentifierIgnorable(int)
jaroslav@563	2013	* isIdentifierIgnorable(codePoint)} returns {@code true} for
jaroslav@563	2014	* the character
jaroslav@563	2015	* </ul>
jaroslav@563	2016	*
jaroslav@563	2017	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@563	2018	* @return {@code true} if the character may be part of a
jaroslav@563	2019	* Java identifier; {@code false} otherwise.
jaroslav@563	2020	* @see Character#isIdentifierIgnorable(int)
jaroslav@563	2021	* @see Character#isJavaIdentifierStart(int)
jaroslav@563	2022	* @see Character#isLetterOrDigit(int)
jaroslav@563	2023	* @see Character#isUnicodeIdentifierPart(int)
jaroslav@563	2024	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	2025	* @since 1.5
jaroslav@563	2026	*/
jaroslav@563	2027	public static boolean isJavaIdentifierPart(int codePoint) {
jaroslav@563	2028	return isJavaIdentifierStart(codePoint) \|\|
jaroslav@590	2029	('0' <= codePoint && codePoint <= '9') \|\| codePoint == '$';
jaroslav@563	2030	}
jaroslav@563	2031
jaroslav@68	2032	/**
jaroslav@68	2033	* Converts the character argument to lowercase using case
jaroslav@68	2034	* mapping information from the UnicodeData file.
jaroslav@68	2035	* <p>
jaroslav@68	2036	* Note that
jaroslav@68	2037	* {@code Character.isLowerCase(Character.toLowerCase(ch))}
jaroslav@68	2038	* does not always return {@code true} for some ranges of
jaroslav@68	2039	* characters, particularly those that are symbols or ideographs.
jaroslav@68	2040	*
jaroslav@68	2041	* <p>In general, {@link String#toLowerCase()} should be used to map
jaroslav@68	2042	* characters to lowercase. {@code String} case mapping methods
jaroslav@68	2043	* have several benefits over {@code Character} case mapping methods.
jaroslav@68	2044	* {@code String} case mapping methods can perform locale-sensitive
jaroslav@68	2045	* mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68	2046	* the {@code Character} case mapping methods cannot.
jaroslav@68	2047	*
jaroslav@68	2048	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2049	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2050	* all Unicode characters, including supplementary characters, use
jaroslav@68	2051	* the {@link #toLowerCase(int)} method.
jaroslav@68	2052	*
jaroslav@68	2053	* @param ch the character to be converted.
jaroslav@68	2054	* @return the lowercase equivalent of the character, if any;
jaroslav@68	2055	* otherwise, the character itself.
jaroslav@68	2056	* @see Character#isLowerCase(char)
jaroslav@68	2057	* @see String#toLowerCase()
jaroslav@68	2058	*/
jaroslav@68	2059	public static char toLowerCase(char ch) {
jaroslav@326	2060	return String.valueOf(ch).toLowerCase().charAt(0);
jaroslav@68	2061	}
jaroslav@68	2062
jaroslav@68	2063	/**
jaroslav@68	2064	* Converts the character argument to uppercase using case mapping
jaroslav@68	2065	* information from the UnicodeData file.
jaroslav@68	2066	* <p>
jaroslav@68	2067	* Note that
jaroslav@68	2068	* {@code Character.isUpperCase(Character.toUpperCase(ch))}
jaroslav@68	2069	* does not always return {@code true} for some ranges of
jaroslav@68	2070	* characters, particularly those that are symbols or ideographs.
jaroslav@68	2071	*
jaroslav@68	2072	* <p>In general, {@link String#toUpperCase()} should be used to map
jaroslav@68	2073	* characters to uppercase. {@code String} case mapping methods
jaroslav@68	2074	* have several benefits over {@code Character} case mapping methods.
jaroslav@68	2075	* {@code String} case mapping methods can perform locale-sensitive
jaroslav@68	2076	* mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68	2077	* the {@code Character} case mapping methods cannot.
jaroslav@68	2078	*
jaroslav@68	2079	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2080	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2081	* all Unicode characters, including supplementary characters, use
jaroslav@68	2082	* the {@link #toUpperCase(int)} method.
jaroslav@68	2083	*
jaroslav@68	2084	* @param ch the character to be converted.
jaroslav@68	2085	* @return the uppercase equivalent of the character, if any;
jaroslav@68	2086	* otherwise, the character itself.
jaroslav@68	2087	* @see Character#isUpperCase(char)
jaroslav@68	2088	* @see String#toUpperCase()
jaroslav@68	2089	*/
jaroslav@68	2090	public static char toUpperCase(char ch) {
jaroslav@326	2091	return String.valueOf(ch).toUpperCase().charAt(0);
jaroslav@68	2092	}
jaroslav@68	2093
jaroslav@68	2094	/**
jaroslav@68	2095	* Returns the numeric value of the character {@code ch} in the
jaroslav@68	2096	* specified radix.
jaroslav@68	2097	* <p>
jaroslav@68	2098	* If the radix is not in the range {@code MIN_RADIX} ≤
jaroslav@68	2099	* {@code radix} ≤ {@code MAX_RADIX} or if the
jaroslav@68	2100	* value of {@code ch} is not a valid digit in the specified
jaroslav@68	2101	* radix, {@code -1} is returned. A character is a valid digit
jaroslav@68	2102	* if at least one of the following is true:
jaroslav@68	2103	* <ul>
jaroslav@68	2104	* <li>The method {@code isDigit} is {@code true} of the character
jaroslav@68	2105	* and the Unicode decimal digit value of the character (or its
jaroslav@68	2106	* single-character decomposition) is less than the specified radix.
jaroslav@68	2107	* In this case the decimal digit value is returned.
jaroslav@68	2108	* <li>The character is one of the uppercase Latin letters
jaroslav@68	2109	* {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68	2110	* {@code radix + 'A' - 10}.
jaroslav@68	2111	* In this case, {@code ch - 'A' + 10}
jaroslav@68	2112	* is returned.
jaroslav@68	2113	* <li>The character is one of the lowercase Latin letters
jaroslav@68	2114	* {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68	2115	* {@code radix + 'a' - 10}.
jaroslav@68	2116	* In this case, {@code ch - 'a' + 10}
jaroslav@68	2117	* is returned.
jaroslav@68	2118	* <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68	2119	* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68	2120	* and its code is less than
jaroslav@68	2121	* {@code radix + '\u005CuFF21' - 10}.
jaroslav@68	2122	* In this case, {@code ch - '\u005CuFF21' + 10}
jaroslav@68	2123	* is returned.
jaroslav@68	2124	* <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68	2125	* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68	2126	* and its code is less than
jaroslav@68	2127	* {@code radix + '\u005CuFF41' - 10}.
jaroslav@68	2128	* In this case, {@code ch - '\u005CuFF41' + 10}
jaroslav@68	2129	* is returned.
jaroslav@68	2130	* </ul>
jaroslav@68	2131	*
jaroslav@68	2132	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2133	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2134	* all Unicode characters, including supplementary characters, use
jaroslav@68	2135	* the {@link #digit(int, int)} method.
jaroslav@68	2136	*
jaroslav@68	2137	* @param ch the character to be converted.
jaroslav@68	2138	* @param radix the radix.
jaroslav@68	2139	* @return the numeric value represented by the character in the
jaroslav@68	2140	* specified radix.
jaroslav@68	2141	* @see Character#forDigit(int, int)
jaroslav@68	2142	* @see Character#isDigit(char)
jaroslav@68	2143	*/
jaroslav@68	2144	public static int digit(char ch, int radix) {
jaroslav@68	2145	return digit((int)ch, radix);
jaroslav@68	2146	}
jaroslav@68	2147
jaroslav@68	2148	/**
jaroslav@68	2149	* Returns the numeric value of the specified character (Unicode
jaroslav@68	2150	* code point) in the specified radix.
jaroslav@68	2151	*
jaroslav@68	2152	* <p>If the radix is not in the range {@code MIN_RADIX} ≤
jaroslav@68	2153	* {@code radix} ≤ {@code MAX_RADIX} or if the
jaroslav@68	2154	* character is not a valid digit in the specified
jaroslav@68	2155	* radix, {@code -1} is returned. A character is a valid digit
jaroslav@68	2156	* if at least one of the following is true:
jaroslav@68	2157	* <ul>
jaroslav@68	2158	* <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
jaroslav@68	2159	* and the Unicode decimal digit value of the character (or its
jaroslav@68	2160	* single-character decomposition) is less than the specified radix.
jaroslav@68	2161	* In this case the decimal digit value is returned.
jaroslav@68	2162	* <li>The character is one of the uppercase Latin letters
jaroslav@68	2163	* {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68	2164	* {@code radix + 'A' - 10}.
jaroslav@68	2165	* In this case, {@code codePoint - 'A' + 10}
jaroslav@68	2166	* is returned.
jaroslav@68	2167	* <li>The character is one of the lowercase Latin letters
jaroslav@68	2168	* {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68	2169	* {@code radix + 'a' - 10}.
jaroslav@68	2170	* In this case, {@code codePoint - 'a' + 10}
jaroslav@68	2171	* is returned.
jaroslav@68	2172	* <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68	2173	* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68	2174	* and its code is less than
jaroslav@68	2175	* {@code radix + '\u005CuFF21' - 10}.
jaroslav@68	2176	* In this case,
jaroslav@68	2177	* {@code codePoint - '\u005CuFF21' + 10}
jaroslav@68	2178	* is returned.
jaroslav@68	2179	* <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68	2180	* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68	2181	* and its code is less than
jaroslav@68	2182	* {@code radix + '\u005CuFF41'- 10}.
jaroslav@68	2183	* In this case,
jaroslav@68	2184	* {@code codePoint - '\u005CuFF41' + 10}
jaroslav@68	2185	* is returned.
jaroslav@68	2186	* </ul>
jaroslav@68	2187	*
jaroslav@68	2188	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	2189	* @param radix the radix.
jaroslav@68	2190	* @return the numeric value represented by the character in the
jaroslav@68	2191	* specified radix.
jaroslav@68	2192	* @see Character#forDigit(int, int)
jaroslav@68	2193	* @see Character#isDigit(int)
jaroslav@68	2194	* @since 1.5
jaroslav@68	2195	*/
Martin@594	2196	@JavaScriptBody(args = { "codePoint", "radix" }, body=
Martin@594	2197	"var x = parseInt(String.fromCharCode(codePoint), radix);\n"
Martin@594	2198	+ "return isNaN(x) ? -1 : x;"
Martin@594	2199	)
jaroslav@68	2200	public static int digit(int codePoint, int radix) {
jaroslav@85	2201	throw new UnsupportedOperationException();
jaroslav@68	2202	}
jaroslav@68	2203
jaroslav@68	2204	/**
jaroslav@68	2205	* Returns the {@code int} value that the specified Unicode
jaroslav@68	2206	* character represents. For example, the character
jaroslav@68	2207	* {@code '\u005Cu216C'} (the roman numeral fifty) will return
jaroslav@68	2208	* an int with a value of 50.
jaroslav@68	2209	* <p>
jaroslav@68	2210	* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68	2211	* {@code '\u005Cu005A'}), lowercase
jaroslav@68	2212	* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68	2213	* full width variant ({@code '\u005CuFF21'} through
jaroslav@68	2214	* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68	2215	* {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68	2216	* through 35. This is independent of the Unicode specification,
jaroslav@68	2217	* which does not assign numeric values to these {@code char}
jaroslav@68	2218	* values.
jaroslav@68	2219	* <p>
jaroslav@68	2220	* If the character does not have a numeric value, then -1 is returned.
jaroslav@68	2221	* If the character has a numeric value that cannot be represented as a
jaroslav@68	2222	* nonnegative integer (for example, a fractional value), then -2
jaroslav@68	2223	* is returned.
jaroslav@68	2224	*
jaroslav@68	2225	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2226	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2227	* all Unicode characters, including supplementary characters, use
jaroslav@68	2228	* the {@link #getNumericValue(int)} method.
jaroslav@68	2229	*
jaroslav@68	2230	* @param ch the character to be converted.
jaroslav@68	2231	* @return the numeric value of the character, as a nonnegative {@code int}
jaroslav@68	2232	* value; -2 if the character has a numeric value that is not a
jaroslav@68	2233	* nonnegative integer; -1 if the character has no numeric value.
jaroslav@68	2234	* @see Character#forDigit(int, int)
jaroslav@68	2235	* @see Character#isDigit(char)
jaroslav@68	2236	* @since 1.1
jaroslav@68	2237	*/
jaroslav@68	2238	public static int getNumericValue(char ch) {
jaroslav@68	2239	return getNumericValue((int)ch);
jaroslav@68	2240	}
jaroslav@68	2241
jaroslav@68	2242	/**
jaroslav@68	2243	* Returns the {@code int} value that the specified
jaroslav@68	2244	* character (Unicode code point) represents. For example, the character
jaroslav@68	2245	* {@code '\u005Cu216C'} (the Roman numeral fifty) will return
jaroslav@68	2246	* an {@code int} with a value of 50.
jaroslav@68	2247	* <p>
jaroslav@68	2248	* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68	2249	* {@code '\u005Cu005A'}), lowercase
jaroslav@68	2250	* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68	2251	* full width variant ({@code '\u005CuFF21'} through
jaroslav@68	2252	* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68	2253	* {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68	2254	* through 35. This is independent of the Unicode specification,
jaroslav@68	2255	* which does not assign numeric values to these {@code char}
jaroslav@68	2256	* values.
jaroslav@68	2257	* <p>
jaroslav@68	2258	* If the character does not have a numeric value, then -1 is returned.
jaroslav@68	2259	* If the character has a numeric value that cannot be represented as a
jaroslav@68	2260	* nonnegative integer (for example, a fractional value), then -2
jaroslav@68	2261	* is returned.
jaroslav@68	2262	*
jaroslav@68	2263	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	2264	* @return the numeric value of the character, as a nonnegative {@code int}
jaroslav@68	2265	* value; -2 if the character has a numeric value that is not a
jaroslav@68	2266	* nonnegative integer; -1 if the character has no numeric value.
jaroslav@68	2267	* @see Character#forDigit(int, int)
jaroslav@68	2268	* @see Character#isDigit(int)
jaroslav@68	2269	* @since 1.5
jaroslav@68	2270	*/
jaroslav@68	2271	public static int getNumericValue(int codePoint) {
jaroslav@85	2272	throw new UnsupportedOperationException();
jaroslav@68	2273	}
jaroslav@68	2274
jaroslav@68	2275	/**
jaroslav@68	2276	* Determines if the specified character is ISO-LATIN-1 white space.
jaroslav@68	2277	* This method returns {@code true} for the following five
jaroslav@68	2278	* characters only:
jaroslav@68	2279	* <table>
jaroslav@68	2280	* <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
jaroslav@68	2281	* <td>{@code HORIZONTAL TABULATION}</td></tr>
jaroslav@68	2282	* <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
jaroslav@68	2283	* <td>{@code NEW LINE}</td></tr>
jaroslav@68	2284	* <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
jaroslav@68	2285	* <td>{@code FORM FEED}</td></tr>
jaroslav@68	2286	* <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
jaroslav@68	2287	* <td>{@code CARRIAGE RETURN}</td></tr>
jaroslav@68	2288	* <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
jaroslav@68	2289	* <td>{@code SPACE}</td></tr>
jaroslav@68	2290	* </table>
jaroslav@68	2291	*
jaroslav@68	2292	* @param ch the character to be tested.
jaroslav@68	2293	* @return {@code true} if the character is ISO-LATIN-1 white
jaroslav@68	2294	* space; {@code false} otherwise.
jaroslav@68	2295	* @see Character#isSpaceChar(char)
jaroslav@68	2296	* @see Character#isWhitespace(char)
jaroslav@68	2297	* @deprecated Replaced by isWhitespace(char).
jaroslav@68	2298	*/
jaroslav@68	2299	@Deprecated
jaroslav@68	2300	public static boolean isSpace(char ch) {
jaroslav@68	2301	return (ch <= 0x0020) &&
jaroslav@68	2302	(((((1L << 0x0009) \|
jaroslav@68	2303	(1L << 0x000A) \|
jaroslav@68	2304	(1L << 0x000C) \|
jaroslav@68	2305	(1L << 0x000D) \|
jaroslav@68	2306	(1L << 0x0020)) >> ch) & 1L) != 0);
jaroslav@68	2307	}
jaroslav@68	2308
jaroslav@68	2309
jaroslav@68	2310
jaroslav@68	2311	/**
jaroslav@68	2312	* Determines if the specified character is white space according to Java.
jaroslav@68	2313	* A character is a Java whitespace character if and only if it satisfies
jaroslav@68	2314	* one of the following criteria:
jaroslav@68	2315	* <ul>
jaroslav@68	2316	* <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
jaroslav@68	2317	* {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
jaroslav@68	2318	* but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68	2319	* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68	2320	* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68	2321	* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68	2322	* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68	2323	* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68	2324	* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68	2325	* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68	2326	* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68	2327	* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68	2328	* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68	2329	* </ul>
jaroslav@68	2330	*
jaroslav@68	2331	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2332	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2333	* all Unicode characters, including supplementary characters, use
jaroslav@68	2334	* the {@link #isWhitespace(int)} method.
jaroslav@68	2335	*
jaroslav@68	2336	* @param ch the character to be tested.
jaroslav@68	2337	* @return {@code true} if the character is a Java whitespace
jaroslav@68	2338	* character; {@code false} otherwise.
jaroslav@68	2339	* @see Character#isSpaceChar(char)
jaroslav@68	2340	* @since 1.1
jaroslav@68	2341	*/
jaroslav@68	2342	public static boolean isWhitespace(char ch) {
jaroslav@68	2343	return isWhitespace((int)ch);
jaroslav@68	2344	}
jaroslav@68	2345
jaroslav@68	2346	/**
jaroslav@68	2347	* Determines if the specified character (Unicode code point) is
jaroslav@68	2348	* white space according to Java. A character is a Java
jaroslav@68	2349	* whitespace character if and only if it satisfies one of the
jaroslav@68	2350	* following criteria:
jaroslav@68	2351	* <ul>
jaroslav@68	2352	* <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
jaroslav@68	2353	* {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
jaroslav@68	2354	* but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68	2355	* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68	2356	* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68	2357	* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68	2358	* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68	2359	* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68	2360	* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68	2361	* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68	2362	* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68	2363	* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68	2364	* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68	2365	* </ul>
jaroslav@68	2366	* <p>
jaroslav@68	2367	*
jaroslav@68	2368	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	2369	* @return {@code true} if the character is a Java whitespace
jaroslav@68	2370	* character; {@code false} otherwise.
jaroslav@68	2371	* @see Character#isSpaceChar(int)
jaroslav@68	2372	* @since 1.5
jaroslav@68	2373	*/
jaroslav@68	2374	public static boolean isWhitespace(int codePoint) {
jaroslav@85	2375	throw new UnsupportedOperationException();
jaroslav@68	2376	}
jaroslav@68	2377
jaroslav@68	2378	/**
jaroslav@68	2379	* Determines if the specified character is an ISO control
jaroslav@68	2380	* character. A character is considered to be an ISO control
jaroslav@68	2381	* character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68	2382	* through {@code '\u005Cu001F'} or in the range
jaroslav@68	2383	* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68	2384	*
jaroslav@68	2385	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2386	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2387	* all Unicode characters, including supplementary characters, use
jaroslav@68	2388	* the {@link #isISOControl(int)} method.
jaroslav@68	2389	*
jaroslav@68	2390	* @param ch the character to be tested.
jaroslav@68	2391	* @return {@code true} if the character is an ISO control character;
jaroslav@68	2392	* {@code false} otherwise.
jaroslav@68	2393	*
jaroslav@68	2394	* @see Character#isSpaceChar(char)
jaroslav@68	2395	* @see Character#isWhitespace(char)
jaroslav@68	2396	* @since 1.1
jaroslav@68	2397	*/
jaroslav@68	2398	public static boolean isISOControl(char ch) {
jaroslav@68	2399	return isISOControl((int)ch);
jaroslav@68	2400	}
jaroslav@68	2401
jaroslav@68	2402	/**
jaroslav@68	2403	* Determines if the referenced character (Unicode code point) is an ISO control
jaroslav@68	2404	* character. A character is considered to be an ISO control
jaroslav@68	2405	* character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68	2406	* through {@code '\u005Cu001F'} or in the range
jaroslav@68	2407	* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68	2408	*
jaroslav@68	2409	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	2410	* @return {@code true} if the character is an ISO control character;
jaroslav@68	2411	* {@code false} otherwise.
jaroslav@68	2412	* @see Character#isSpaceChar(int)
jaroslav@68	2413	* @see Character#isWhitespace(int)
jaroslav@68	2414	* @since 1.5
jaroslav@68	2415	*/
jaroslav@68	2416	public static boolean isISOControl(int codePoint) {
jaroslav@68	2417	// Optimized form of:
jaroslav@68	2418	// (codePoint >= 0x00 && codePoint <= 0x1F) \|\|
jaroslav@68	2419	// (codePoint >= 0x7F && codePoint <= 0x9F);
jaroslav@68	2420	return codePoint <= 0x9F &&
jaroslav@68	2421	(codePoint >= 0x7F \|\| (codePoint >>> 5 == 0));
jaroslav@68	2422	}
jaroslav@68	2423
jaroslav@68	2424	/**
jaroslav@68	2425	* Determines the character representation for a specific digit in
jaroslav@68	2426	* the specified radix. If the value of {@code radix} is not a
jaroslav@68	2427	* valid radix, or the value of {@code digit} is not a valid
jaroslav@68	2428	* digit in the specified radix, the null character
jaroslav@68	2429	* ({@code '\u005Cu0000'}) is returned.
jaroslav@68	2430	* <p>
jaroslav@68	2431	* The {@code radix} argument is valid if it is greater than or
jaroslav@68	2432	* equal to {@code MIN_RADIX} and less than or equal to
jaroslav@68	2433	* {@code MAX_RADIX}. The {@code digit} argument is valid if
jaroslav@68	2434	* {@code 0 <= digit < radix}.
jaroslav@68	2435	* <p>
jaroslav@68	2436	* If the digit is less than 10, then
jaroslav@68	2437	* {@code '0' + digit} is returned. Otherwise, the value
jaroslav@68	2438	* {@code 'a' + digit - 10} is returned.
jaroslav@68	2439	*
jaroslav@68	2440	* @param digit the number to convert to a character.
jaroslav@68	2441	* @param radix the radix.
jaroslav@68	2442	* @return the {@code char} representation of the specified digit
jaroslav@68	2443	* in the specified radix.
jaroslav@68	2444	* @see Character#MIN_RADIX
jaroslav@68	2445	* @see Character#MAX_RADIX
jaroslav@68	2446	* @see Character#digit(char, int)
jaroslav@68	2447	*/
jaroslav@68	2448	public static char forDigit(int digit, int radix) {
jaroslav@68	2449	if ((digit >= radix) \|\| (digit < 0)) {
jaroslav@68	2450	return '\0';
jaroslav@68	2451	}
jaroslav@68	2452	if ((radix < Character.MIN_RADIX) \|\| (radix > Character.MAX_RADIX)) {
jaroslav@68	2453	return '\0';
jaroslav@68	2454	}
jaroslav@68	2455	if (digit < 10) {
jaroslav@68	2456	return (char)('0' + digit);
jaroslav@68	2457	}
jaroslav@68	2458	return (char)('a' - 10 + digit);
jaroslav@68	2459	}
jaroslav@68	2460
jaroslav@68	2461	/**
jaroslav@68	2462	* Compares two {@code Character} objects numerically.
jaroslav@68	2463	*
jaroslav@68	2464	* @param anotherCharacter the {@code Character} to be compared.
jaroslav@68	2465
jaroslav@68	2466	* @return the value {@code 0} if the argument {@code Character}
jaroslav@68	2467	* is equal to this {@code Character}; a value less than
jaroslav@68	2468	* {@code 0} if this {@code Character} is numerically less
jaroslav@68	2469	* than the {@code Character} argument; and a value greater than
jaroslav@68	2470	* {@code 0} if this {@code Character} is numerically greater
jaroslav@68	2471	* than the {@code Character} argument (unsigned comparison).
jaroslav@68	2472	* Note that this is strictly a numerical comparison; it is not
jaroslav@68	2473	* locale-dependent.
jaroslav@68	2474	* @since 1.2
jaroslav@68	2475	*/
jaroslav@68	2476	public int compareTo(Character anotherCharacter) {
jaroslav@68	2477	return compare(this.value, anotherCharacter.value);
jaroslav@68	2478	}
jaroslav@68	2479
jaroslav@68	2480	/**
jaroslav@68	2481	* Compares two {@code char} values numerically.
jaroslav@68	2482	* The value returned is identical to what would be returned by:
jaroslav@68	2483	* <pre>
jaroslav@68	2484	* Character.valueOf(x).compareTo(Character.valueOf(y))
jaroslav@68	2485	* </pre>
jaroslav@68	2486	*
jaroslav@68	2487	* @param x the first {@code char} to compare
jaroslav@68	2488	* @param y the second {@code char} to compare
jaroslav@68	2489	* @return the value {@code 0} if {@code x == y};
jaroslav@68	2490	* a value less than {@code 0} if {@code x < y}; and
jaroslav@68	2491	* a value greater than {@code 0} if {@code x > y}
jaroslav@68	2492	* @since 1.7
jaroslav@68	2493	*/
jaroslav@68	2494	public static int compare(char x, char y) {
jaroslav@68	2495	return x - y;
jaroslav@68	2496	}
jaroslav@68	2497
jaroslav@68	2498
jaroslav@68	2499	/**
jaroslav@68	2500	* The number of bits used to represent a <tt>char</tt> value in unsigned
jaroslav@68	2501	* binary form, constant {@code 16}.
jaroslav@68	2502	*
jaroslav@68	2503	* @since 1.5
jaroslav@68	2504	*/
jaroslav@68	2505	public static final int SIZE = 16;
jaroslav@68	2506
jaroslav@68	2507	/**
jaroslav@68	2508	* Returns the value obtained by reversing the order of the bytes in the
jaroslav@68	2509	* specified <tt>char</tt> value.
jaroslav@68	2510	*
jaroslav@68	2511	* @return the value obtained by reversing (or, equivalently, swapping)
jaroslav@68	2512	* the bytes in the specified <tt>char</tt> value.
jaroslav@68	2513	* @since 1.5
jaroslav@68	2514	*/
jaroslav@68	2515	public static char reverseBytes(char ch) {
jaroslav@68	2516	return (char) (((ch & 0xFF00) >> 8) \| (ch << 8));
jaroslav@68	2517	}
jaroslav@68	2518
jaroslav@68	2519	}

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Tue, 26 Feb 2013 16:54:16 +0100
changeset 772	d382dacfd73f
parent 594	emul/mini/src/main/java/java/lang/Character.java@035fcbd7a33c
child 791	af4001c85438
permissions	-rw-r--r--