hg/bck2brwsr: rt/emul/mini/src/main/java/java/lang/Character.java@f14e9730d4e9 (annotated)

jaroslav@68	1	/*
jaroslav@68	2	* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
jaroslav@68	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@68	4	*
jaroslav@68	5	* This code is free software; you can redistribute it and/or modify it
jaroslav@68	6	* under the terms of the GNU General Public License version 2 only, as
jaroslav@68	7	* published by the Free Software Foundation. Oracle designates this
jaroslav@68	8	* particular file as subject to the "Classpath" exception as provided
jaroslav@68	9	* by Oracle in the LICENSE file that accompanied this code.
jaroslav@68	10	*
jaroslav@68	11	* This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@68	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@68	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
jaroslav@68	14	* version 2 for more details (a copy is included in the LICENSE file that
jaroslav@68	15	* accompanied this code).
jaroslav@68	16	*
jaroslav@68	17	* You should have received a copy of the GNU General Public License version
jaroslav@68	18	* 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@68	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@68	20	*
jaroslav@68	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@68	22	* or visit www.oracle.com if you need additional information or have any
jaroslav@68	23	* questions.
jaroslav@68	24	*/
jaroslav@68	25
jaroslav@68	26	package java.lang;
jaroslav@68	27
jaroslav@326	28	import org.apidesign.bck2brwsr.core.JavaScriptBody;
jaroslav@326	29
jaroslav@68	30	/**
jaroslav@68	31	* The {@code Character} class wraps a value of the primitive
jaroslav@68	32	* type {@code char} in an object. An object of type
jaroslav@68	33	* {@code Character} contains a single field whose type is
jaroslav@68	34	* {@code char}.
jaroslav@68	35	* <p>
jaroslav@68	36	* In addition, this class provides several methods for determining
jaroslav@68	37	* a character's category (lowercase letter, digit, etc.) and for converting
jaroslav@68	38	* characters from uppercase to lowercase and vice versa.
jaroslav@68	39	* <p>
jaroslav@68	40	* Character information is based on the Unicode Standard, version 6.0.0.
jaroslav@68	41	* <p>
jaroslav@68	42	* The methods and data of class {@code Character} are defined by
jaroslav@68	43	* the information in the <i>UnicodeData</i> file that is part of the
jaroslav@68	44	* Unicode Character Database maintained by the Unicode
jaroslav@68	45	* Consortium. This file specifies various properties including name
jaroslav@68	46	* and general category for every defined Unicode code point or
jaroslav@68	47	* character range.
jaroslav@68	48	* <p>
jaroslav@68	49	* The file and its description are available from the Unicode Consortium at:
jaroslav@68	50	* <ul>
jaroslav@68	51	* <li><a href="http://www.unicode.org">http://www.unicode.org</a>
jaroslav@68	52	* </ul>
jaroslav@68	53	*
jaroslav@68	54	* <h4><a name="unicode">Unicode Character Representations</a></h4>
jaroslav@68	55	*
jaroslav@68	56	* <p>The {@code char} data type (and therefore the value that a
jaroslav@68	57	* {@code Character} object encapsulates) are based on the
jaroslav@68	58	* original Unicode specification, which defined characters as
jaroslav@68	59	* fixed-width 16-bit entities. The Unicode Standard has since been
jaroslav@68	60	* changed to allow for characters whose representation requires more
jaroslav@68	61	* than 16 bits. The range of legal <em>code point</em>s is now
jaroslav@68	62	* U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
jaroslav@68	63	* (Refer to the <a
jaroslav@68	64	* href="http://www.unicode.org/reports/tr27/#notation"><i>
jaroslav@68	65	* definition</i></a> of the U+<i>n</i> notation in the Unicode
jaroslav@68	66	* Standard.)
jaroslav@68	67	*
jaroslav@68	68	* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
jaroslav@68	69	* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
jaroslav@68	70	* <a name="supplementary">Characters</a> whose code points are greater
jaroslav@68	71	* than U+FFFF are called <em>supplementary character</em>s. The Java
jaroslav@68	72	* platform uses the UTF-16 representation in {@code char} arrays and
jaroslav@68	73	* in the {@code String} and {@code StringBuffer} classes. In
jaroslav@68	74	* this representation, supplementary characters are represented as a pair
jaroslav@68	75	* of {@code char} values, the first from the <em>high-surrogates</em>
jaroslav@68	76	* range, (\uD800-\uDBFF), the second from the
jaroslav@68	77	* <em>low-surrogates</em> range (\uDC00-\uDFFF).
jaroslav@68	78	*
jaroslav@68	79	* <p>A {@code char} value, therefore, represents Basic
jaroslav@68	80	* Multilingual Plane (BMP) code points, including the surrogate
jaroslav@68	81	* code points, or code units of the UTF-16 encoding. An
jaroslav@68	82	* {@code int} value represents all Unicode code points,
jaroslav@68	83	* including supplementary code points. The lower (least significant)
jaroslav@68	84	* 21 bits of {@code int} are used to represent Unicode code
jaroslav@68	85	* points and the upper (most significant) 11 bits must be zero.
jaroslav@68	86	* Unless otherwise specified, the behavior with respect to
jaroslav@68	87	* supplementary characters and surrogate {@code char} values is
jaroslav@68	88	* as follows:
jaroslav@68	89	*
jaroslav@68	90	* <ul>
jaroslav@68	91	* <li>The methods that only accept a {@code char} value cannot support
jaroslav@68	92	* supplementary characters. They treat {@code char} values from the
jaroslav@68	93	* surrogate ranges as undefined characters. For example,
jaroslav@68	94	* {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
jaroslav@68	95	* this specific value if followed by any low-surrogate value in a string
jaroslav@68	96	* would represent a letter.
jaroslav@68	97	*
jaroslav@68	98	* <li>The methods that accept an {@code int} value support all
jaroslav@68	99	* Unicode characters, including supplementary characters. For
jaroslav@68	100	* example, {@code Character.isLetter(0x2F81A)} returns
jaroslav@68	101	* {@code true} because the code point value represents a letter
jaroslav@68	102	* (a CJK ideograph).
jaroslav@68	103	* </ul>
jaroslav@68	104	*
jaroslav@68	105	* <p>In the Java SE API documentation, <em>Unicode code point</em> is
jaroslav@68	106	* used for character values in the range between U+0000 and U+10FFFF,
jaroslav@68	107	* and <em>Unicode code unit</em> is used for 16-bit
jaroslav@68	108	* {@code char} values that are code units of the <em>UTF-16</em>
jaroslav@68	109	* encoding. For more information on Unicode terminology, refer to the
jaroslav@68	110	* <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
jaroslav@68	111	*
jaroslav@68	112	* @author Lee Boynton
jaroslav@68	113	* @author Guy Steele
jaroslav@68	114	* @author Akira Tanaka
jaroslav@68	115	* @author Martin Buchholz
jaroslav@68	116	* @author Ulf Zibis
jaroslav@68	117	* @since 1.0
jaroslav@68	118	*/
jaroslav@68	119	public final
jaroslav@68	120	class Character implements java.io.Serializable, Comparable<Character> {
jaroslav@68	121	/**
jaroslav@68	122	* The minimum radix available for conversion to and from strings.
jaroslav@68	123	* The constant value of this field is the smallest value permitted
jaroslav@68	124	* for the radix argument in radix-conversion methods such as the
jaroslav@68	125	* {@code digit} method, the {@code forDigit} method, and the
jaroslav@68	126	* {@code toString} method of class {@code Integer}.
jaroslav@68	127	*
jaroslav@68	128	* @see Character#digit(char, int)
jaroslav@68	129	* @see Character#forDigit(int, int)
jaroslav@68	130	* @see Integer#toString(int, int)
jaroslav@68	131	* @see Integer#valueOf(String)
jaroslav@68	132	*/
jaroslav@68	133	public static final int MIN_RADIX = 2;
jaroslav@68	134
jaroslav@68	135	/**
jaroslav@68	136	* The maximum radix available for conversion to and from strings.
jaroslav@68	137	* The constant value of this field is the largest value permitted
jaroslav@68	138	* for the radix argument in radix-conversion methods such as the
jaroslav@68	139	* {@code digit} method, the {@code forDigit} method, and the
jaroslav@68	140	* {@code toString} method of class {@code Integer}.
jaroslav@68	141	*
jaroslav@68	142	* @see Character#digit(char, int)
jaroslav@68	143	* @see Character#forDigit(int, int)
jaroslav@68	144	* @see Integer#toString(int, int)
jaroslav@68	145	* @see Integer#valueOf(String)
jaroslav@68	146	*/
jaroslav@68	147	public static final int MAX_RADIX = 36;
jaroslav@68	148
jaroslav@68	149	/**
jaroslav@68	150	* The constant value of this field is the smallest value of type
jaroslav@68	151	* {@code char}, {@code '\u005Cu0000'}.
jaroslav@68	152	*
jaroslav@68	153	* @since 1.0.2
jaroslav@68	154	*/
jaroslav@68	155	public static final char MIN_VALUE = '\u0000';
jaroslav@68	156
jaroslav@68	157	/**
jaroslav@68	158	* The constant value of this field is the largest value of type
jaroslav@68	159	* {@code char}, {@code '\u005CuFFFF'}.
jaroslav@68	160	*
jaroslav@68	161	* @since 1.0.2
jaroslav@68	162	*/
jaroslav@68	163	public static final char MAX_VALUE = '\uFFFF';
jaroslav@68	164
jaroslav@68	165	/**
jaroslav@68	166	* The {@code Class} instance representing the primitive type
jaroslav@68	167	* {@code char}.
jaroslav@68	168	*
jaroslav@68	169	* @since 1.1
jaroslav@68	170	*/
jaroslav@68	171	public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
jaroslav@68	172
jaroslav@68	173	/*
jaroslav@68	174	* Normative general types
jaroslav@68	175	*/
jaroslav@68	176
jaroslav@68	177	/*
jaroslav@68	178	* General character types
jaroslav@68	179	*/
jaroslav@68	180
jaroslav@68	181	/**
jaroslav@68	182	* General category "Cn" in the Unicode specification.
jaroslav@68	183	* @since 1.1
jaroslav@68	184	*/
jaroslav@68	185	public static final byte UNASSIGNED = 0;
jaroslav@68	186
jaroslav@68	187	/**
jaroslav@68	188	* General category "Lu" in the Unicode specification.
jaroslav@68	189	* @since 1.1
jaroslav@68	190	*/
jaroslav@68	191	public static final byte UPPERCASE_LETTER = 1;
jaroslav@68	192
jaroslav@68	193	/**
jaroslav@68	194	* General category "Ll" in the Unicode specification.
jaroslav@68	195	* @since 1.1
jaroslav@68	196	*/
jaroslav@68	197	public static final byte LOWERCASE_LETTER = 2;
jaroslav@68	198
jaroslav@68	199	/**
jaroslav@68	200	* General category "Lt" in the Unicode specification.
jaroslav@68	201	* @since 1.1
jaroslav@68	202	*/
jaroslav@68	203	public static final byte TITLECASE_LETTER = 3;
jaroslav@68	204
jaroslav@68	205	/**
jaroslav@68	206	* General category "Lm" in the Unicode specification.
jaroslav@68	207	* @since 1.1
jaroslav@68	208	*/
jaroslav@68	209	public static final byte MODIFIER_LETTER = 4;
jaroslav@68	210
jaroslav@68	211	/**
jaroslav@68	212	* General category "Lo" in the Unicode specification.
jaroslav@68	213	* @since 1.1
jaroslav@68	214	*/
jaroslav@68	215	public static final byte OTHER_LETTER = 5;
jaroslav@68	216
jaroslav@68	217	/**
jaroslav@68	218	* General category "Mn" in the Unicode specification.
jaroslav@68	219	* @since 1.1
jaroslav@68	220	*/
jaroslav@68	221	public static final byte NON_SPACING_MARK = 6;
jaroslav@68	222
jaroslav@68	223	/**
jaroslav@68	224	* General category "Me" in the Unicode specification.
jaroslav@68	225	* @since 1.1
jaroslav@68	226	*/
jaroslav@68	227	public static final byte ENCLOSING_MARK = 7;
jaroslav@68	228
jaroslav@68	229	/**
jaroslav@68	230	* General category "Mc" in the Unicode specification.
jaroslav@68	231	* @since 1.1
jaroslav@68	232	*/
jaroslav@68	233	public static final byte COMBINING_SPACING_MARK = 8;
jaroslav@68	234
jaroslav@68	235	/**
jaroslav@68	236	* General category "Nd" in the Unicode specification.
jaroslav@68	237	* @since 1.1
jaroslav@68	238	*/
jaroslav@68	239	public static final byte DECIMAL_DIGIT_NUMBER = 9;
jaroslav@68	240
jaroslav@68	241	/**
jaroslav@68	242	* General category "Nl" in the Unicode specification.
jaroslav@68	243	* @since 1.1
jaroslav@68	244	*/
jaroslav@68	245	public static final byte LETTER_NUMBER = 10;
jaroslav@68	246
jaroslav@68	247	/**
jaroslav@68	248	* General category "No" in the Unicode specification.
jaroslav@68	249	* @since 1.1
jaroslav@68	250	*/
jaroslav@68	251	public static final byte OTHER_NUMBER = 11;
jaroslav@68	252
jaroslav@68	253	/**
jaroslav@68	254	* General category "Zs" in the Unicode specification.
jaroslav@68	255	* @since 1.1
jaroslav@68	256	*/
jaroslav@68	257	public static final byte SPACE_SEPARATOR = 12;
jaroslav@68	258
jaroslav@68	259	/**
jaroslav@68	260	* General category "Zl" in the Unicode specification.
jaroslav@68	261	* @since 1.1
jaroslav@68	262	*/
jaroslav@68	263	public static final byte LINE_SEPARATOR = 13;
jaroslav@68	264
jaroslav@68	265	/**
jaroslav@68	266	* General category "Zp" in the Unicode specification.
jaroslav@68	267	* @since 1.1
jaroslav@68	268	*/
jaroslav@68	269	public static final byte PARAGRAPH_SEPARATOR = 14;
jaroslav@68	270
jaroslav@68	271	/**
jaroslav@68	272	* General category "Cc" in the Unicode specification.
jaroslav@68	273	* @since 1.1
jaroslav@68	274	*/
jaroslav@68	275	public static final byte CONTROL = 15;
jaroslav@68	276
jaroslav@68	277	/**
jaroslav@68	278	* General category "Cf" in the Unicode specification.
jaroslav@68	279	* @since 1.1
jaroslav@68	280	*/
jaroslav@68	281	public static final byte FORMAT = 16;
jaroslav@68	282
jaroslav@68	283	/**
jaroslav@68	284	* General category "Co" in the Unicode specification.
jaroslav@68	285	* @since 1.1
jaroslav@68	286	*/
jaroslav@68	287	public static final byte PRIVATE_USE = 18;
jaroslav@68	288
jaroslav@68	289	/**
jaroslav@68	290	* General category "Cs" in the Unicode specification.
jaroslav@68	291	* @since 1.1
jaroslav@68	292	*/
jaroslav@68	293	public static final byte SURROGATE = 19;
jaroslav@68	294
jaroslav@68	295	/**
jaroslav@68	296	* General category "Pd" in the Unicode specification.
jaroslav@68	297	* @since 1.1
jaroslav@68	298	*/
jaroslav@68	299	public static final byte DASH_PUNCTUATION = 20;
jaroslav@68	300
jaroslav@68	301	/**
jaroslav@68	302	* General category "Ps" in the Unicode specification.
jaroslav@68	303	* @since 1.1
jaroslav@68	304	*/
jaroslav@68	305	public static final byte START_PUNCTUATION = 21;
jaroslav@68	306
jaroslav@68	307	/**
jaroslav@68	308	* General category "Pe" in the Unicode specification.
jaroslav@68	309	* @since 1.1
jaroslav@68	310	*/
jaroslav@68	311	public static final byte END_PUNCTUATION = 22;
jaroslav@68	312
jaroslav@68	313	/**
jaroslav@68	314	* General category "Pc" in the Unicode specification.
jaroslav@68	315	* @since 1.1
jaroslav@68	316	*/
jaroslav@68	317	public static final byte CONNECTOR_PUNCTUATION = 23;
jaroslav@68	318
jaroslav@68	319	/**
jaroslav@68	320	* General category "Po" in the Unicode specification.
jaroslav@68	321	* @since 1.1
jaroslav@68	322	*/
jaroslav@68	323	public static final byte OTHER_PUNCTUATION = 24;
jaroslav@68	324
jaroslav@68	325	/**
jaroslav@68	326	* General category "Sm" in the Unicode specification.
jaroslav@68	327	* @since 1.1
jaroslav@68	328	*/
jaroslav@68	329	public static final byte MATH_SYMBOL = 25;
jaroslav@68	330
jaroslav@68	331	/**
jaroslav@68	332	* General category "Sc" in the Unicode specification.
jaroslav@68	333	* @since 1.1
jaroslav@68	334	*/
jaroslav@68	335	public static final byte CURRENCY_SYMBOL = 26;
jaroslav@68	336
jaroslav@68	337	/**
jaroslav@68	338	* General category "Sk" in the Unicode specification.
jaroslav@68	339	* @since 1.1
jaroslav@68	340	*/
jaroslav@68	341	public static final byte MODIFIER_SYMBOL = 27;
jaroslav@68	342
jaroslav@68	343	/**
jaroslav@68	344	* General category "So" in the Unicode specification.
jaroslav@68	345	* @since 1.1
jaroslav@68	346	*/
jaroslav@68	347	public static final byte OTHER_SYMBOL = 28;
jaroslav@68	348
jaroslav@68	349	/**
jaroslav@68	350	* General category "Pi" in the Unicode specification.
jaroslav@68	351	* @since 1.4
jaroslav@68	352	*/
jaroslav@68	353	public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
jaroslav@68	354
jaroslav@68	355	/**
jaroslav@68	356	* General category "Pf" in the Unicode specification.
jaroslav@68	357	* @since 1.4
jaroslav@68	358	*/
jaroslav@68	359	public static final byte FINAL_QUOTE_PUNCTUATION = 30;
jaroslav@68	360
jaroslav@68	361	/**
jaroslav@68	362	* Error flag. Use int (code point) to avoid confusion with U+FFFF.
jaroslav@68	363	*/
jaroslav@68	364	static final int ERROR = 0xFFFFFFFF;
jaroslav@68	365
jaroslav@68	366
jaroslav@68	367	/**
jaroslav@68	368	* Undefined bidirectional character type. Undefined {@code char}
jaroslav@68	369	* values have undefined directionality in the Unicode specification.
jaroslav@68	370	* @since 1.4
jaroslav@68	371	*/
jaroslav@68	372	public static final byte DIRECTIONALITY_UNDEFINED = -1;
jaroslav@68	373
jaroslav@68	374	/**
jaroslav@68	375	* Strong bidirectional character type "L" in the Unicode specification.
jaroslav@68	376	* @since 1.4
jaroslav@68	377	*/
jaroslav@68	378	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
jaroslav@68	379
jaroslav@68	380	/**
jaroslav@68	381	* Strong bidirectional character type "R" in the Unicode specification.
jaroslav@68	382	* @since 1.4
jaroslav@68	383	*/
jaroslav@68	384	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
jaroslav@68	385
jaroslav@68	386	/**
jaroslav@68	387	* Strong bidirectional character type "AL" in the Unicode specification.
jaroslav@68	388	* @since 1.4
jaroslav@68	389	*/
jaroslav@68	390	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
jaroslav@68	391
jaroslav@68	392	/**
jaroslav@68	393	* Weak bidirectional character type "EN" in the Unicode specification.
jaroslav@68	394	* @since 1.4
jaroslav@68	395	*/
jaroslav@68	396	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
jaroslav@68	397
jaroslav@68	398	/**
jaroslav@68	399	* Weak bidirectional character type "ES" in the Unicode specification.
jaroslav@68	400	* @since 1.4
jaroslav@68	401	*/
jaroslav@68	402	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
jaroslav@68	403
jaroslav@68	404	/**
jaroslav@68	405	* Weak bidirectional character type "ET" in the Unicode specification.
jaroslav@68	406	* @since 1.4
jaroslav@68	407	*/
jaroslav@68	408	public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
jaroslav@68	409
jaroslav@68	410	/**
jaroslav@68	411	* Weak bidirectional character type "AN" in the Unicode specification.
jaroslav@68	412	* @since 1.4
jaroslav@68	413	*/
jaroslav@68	414	public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
jaroslav@68	415
jaroslav@68	416	/**
jaroslav@68	417	* Weak bidirectional character type "CS" in the Unicode specification.
jaroslav@68	418	* @since 1.4
jaroslav@68	419	*/
jaroslav@68	420	public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
jaroslav@68	421
jaroslav@68	422	/**
jaroslav@68	423	* Weak bidirectional character type "NSM" in the Unicode specification.
jaroslav@68	424	* @since 1.4
jaroslav@68	425	*/
jaroslav@68	426	public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
jaroslav@68	427
jaroslav@68	428	/**
jaroslav@68	429	* Weak bidirectional character type "BN" in the Unicode specification.
jaroslav@68	430	* @since 1.4
jaroslav@68	431	*/
jaroslav@68	432	public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
jaroslav@68	433
jaroslav@68	434	/**
jaroslav@68	435	* Neutral bidirectional character type "B" in the Unicode specification.
jaroslav@68	436	* @since 1.4
jaroslav@68	437	*/
jaroslav@68	438	public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
jaroslav@68	439
jaroslav@68	440	/**
jaroslav@68	441	* Neutral bidirectional character type "S" in the Unicode specification.
jaroslav@68	442	* @since 1.4
jaroslav@68	443	*/
jaroslav@68	444	public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
jaroslav@68	445
jaroslav@68	446	/**
jaroslav@68	447	* Neutral bidirectional character type "WS" in the Unicode specification.
jaroslav@68	448	* @since 1.4
jaroslav@68	449	*/
jaroslav@68	450	public static final byte DIRECTIONALITY_WHITESPACE = 12;
jaroslav@68	451
jaroslav@68	452	/**
jaroslav@68	453	* Neutral bidirectional character type "ON" in the Unicode specification.
jaroslav@68	454	* @since 1.4
jaroslav@68	455	*/
jaroslav@68	456	public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
jaroslav@68	457
jaroslav@68	458	/**
jaroslav@68	459	* Strong bidirectional character type "LRE" in the Unicode specification.
jaroslav@68	460	* @since 1.4
jaroslav@68	461	*/
jaroslav@68	462	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
jaroslav@68	463
jaroslav@68	464	/**
jaroslav@68	465	* Strong bidirectional character type "LRO" in the Unicode specification.
jaroslav@68	466	* @since 1.4
jaroslav@68	467	*/
jaroslav@68	468	public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
jaroslav@68	469
jaroslav@68	470	/**
jaroslav@68	471	* Strong bidirectional character type "RLE" in the Unicode specification.
jaroslav@68	472	* @since 1.4
jaroslav@68	473	*/
jaroslav@68	474	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
jaroslav@68	475
jaroslav@68	476	/**
jaroslav@68	477	* Strong bidirectional character type "RLO" in the Unicode specification.
jaroslav@68	478	* @since 1.4
jaroslav@68	479	*/
jaroslav@68	480	public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
jaroslav@68	481
jaroslav@68	482	/**
jaroslav@68	483	* Weak bidirectional character type "PDF" in the Unicode specification.
jaroslav@68	484	* @since 1.4
jaroslav@68	485	*/
jaroslav@68	486	public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
jaroslav@68	487
jaroslav@68	488	/**
jaroslav@68	489	* The minimum value of a
jaroslav@68	490	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	491	* Unicode high-surrogate code unit</a>
jaroslav@68	492	* in the UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68	493	* A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68	494	*
jaroslav@68	495	* @since 1.5
jaroslav@68	496	*/
jaroslav@68	497	public static final char MIN_HIGH_SURROGATE = '\uD800';
jaroslav@68	498
jaroslav@68	499	/**
jaroslav@68	500	* The maximum value of a
jaroslav@68	501	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	502	* Unicode high-surrogate code unit</a>
jaroslav@68	503	* in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
jaroslav@68	504	* A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68	505	*
jaroslav@68	506	* @since 1.5
jaroslav@68	507	*/
jaroslav@68	508	public static final char MAX_HIGH_SURROGATE = '\uDBFF';
jaroslav@68	509
jaroslav@68	510	/**
jaroslav@68	511	* The minimum value of a
jaroslav@68	512	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	513	* Unicode low-surrogate code unit</a>
jaroslav@68	514	* in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
jaroslav@68	515	* A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68	516	*
jaroslav@68	517	* @since 1.5
jaroslav@68	518	*/
jaroslav@68	519	public static final char MIN_LOW_SURROGATE = '\uDC00';
jaroslav@68	520
jaroslav@68	521	/**
jaroslav@68	522	* The maximum value of a
jaroslav@68	523	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	524	* Unicode low-surrogate code unit</a>
jaroslav@68	525	* in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68	526	* A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68	527	*
jaroslav@68	528	* @since 1.5
jaroslav@68	529	*/
jaroslav@68	530	public static final char MAX_LOW_SURROGATE = '\uDFFF';
jaroslav@68	531
jaroslav@68	532	/**
jaroslav@68	533	* The minimum value of a Unicode surrogate code unit in the
jaroslav@68	534	* UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68	535	*
jaroslav@68	536	* @since 1.5
jaroslav@68	537	*/
jaroslav@68	538	public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
jaroslav@68	539
jaroslav@68	540	/**
jaroslav@68	541	* The maximum value of a Unicode surrogate code unit in the
jaroslav@68	542	* UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68	543	*
jaroslav@68	544	* @since 1.5
jaroslav@68	545	*/
jaroslav@68	546	public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
jaroslav@68	547
jaroslav@68	548	/**
jaroslav@68	549	* The minimum value of a
jaroslav@68	550	* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
jaroslav@68	551	* Unicode supplementary code point</a>, constant {@code U+10000}.
jaroslav@68	552	*
jaroslav@68	553	* @since 1.5
jaroslav@68	554	*/
jaroslav@68	555	public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
jaroslav@68	556
jaroslav@68	557	/**
jaroslav@68	558	* The minimum value of a
jaroslav@68	559	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	560	* Unicode code point</a>, constant {@code U+0000}.
jaroslav@68	561	*
jaroslav@68	562	* @since 1.5
jaroslav@68	563	*/
jaroslav@68	564	public static final int MIN_CODE_POINT = 0x000000;
jaroslav@68	565
jaroslav@68	566	/**
jaroslav@68	567	* The maximum value of a
jaroslav@68	568	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	569	* Unicode code point</a>, constant {@code U+10FFFF}.
jaroslav@68	570	*
jaroslav@68	571	* @since 1.5
jaroslav@68	572	*/
jaroslav@68	573	public static final int MAX_CODE_POINT = 0X10FFFF;
jaroslav@68	574
jtulach@1350	575	public static boolean isAlphabetic(int ch) {
jtulach@1350	576	throw new UnsupportedOperationException("isAlphabetic: " + (char)ch);
jtulach@1350	577	}
jtulach@1350	578
jtulach@1350	579	public static boolean isIdeographic(int ch) {
jtulach@1350	580	throw new UnsupportedOperationException("isIdeographic: " + (char)ch);
jtulach@1350	581	}
jtulach@1350	582
jtulach@1350	583	public static boolean isLowerCase(int ch) {
jtulach@1350	584	throw new UnsupportedOperationException("isLowerCase: " + (char)ch);
jtulach@1350	585	}
jtulach@1350	586
jtulach@1350	587	public static boolean isUpperCase(int ch) {
jtulach@1350	588	throw new UnsupportedOperationException("isUpperCase: " + (char)ch);
jtulach@1350	589	}
jtulach@1350	590
jtulach@1350	591	public static boolean isMirrored(int ch) {
jtulach@1350	592	throw new UnsupportedOperationException("isMirrored: " + (char)ch);
jtulach@1350	593	}
jtulach@1350	594
jtulach@1350	595	public static boolean isIdentifierIgnorable(int ch) {
jtulach@1350	596	throw new UnsupportedOperationException("isIdentifierIgnorable: " + (char)ch);
jtulach@1350	597	}
jtulach@1350	598
jtulach@1350	599	public static boolean isUnicodeIdentifierPart(int ch) {
jtulach@1350	600	throw new UnsupportedOperationException("isUnicodeIdentifierPart: " + (char)ch);
jtulach@1350	601	}
jtulach@1350	602
jtulach@1350	603	public static boolean isUnicodeIdentifierStart(int ch) {
jtulach@1350	604	throw new UnsupportedOperationException("isUnicodeIdentifierStart: " + (char)ch);
jtulach@1350	605	}
jtulach@1350	606
jtulach@1350	607	public static char toUpperCase(int ch) {
jtulach@1350	608	throw new UnsupportedOperationException("toUpperCase: " + (char)ch);
jtulach@1350	609	}
jtulach@1350	610
jtulach@1350	611	public static int toLowerCase(int ch) {
jtulach@1350	612	throw new UnsupportedOperationException("toLowerCase: " + (char)ch);
jtulach@1350	613	}
jtulach@1350	614
jaroslav@68	615
jaroslav@68	616	/**
jaroslav@68	617	* Instances of this class represent particular subsets of the Unicode
jaroslav@68	618	* character set. The only family of subsets defined in the
jaroslav@68	619	* {@code Character} class is {@link Character.UnicodeBlock}.
jaroslav@68	620	* Other portions of the Java API may define other subsets for their
jaroslav@68	621	* own purposes.
jaroslav@68	622	*
jaroslav@68	623	* @since 1.2
jaroslav@68	624	*/
jaroslav@68	625	public static class Subset {
jaroslav@68	626
jaroslav@68	627	private String name;
jaroslav@68	628
jaroslav@68	629	/**
jaroslav@68	630	* Constructs a new {@code Subset} instance.
jaroslav@68	631	*
jaroslav@68	632	* @param name The name of this subset
jaroslav@68	633	* @exception NullPointerException if name is {@code null}
jaroslav@68	634	*/
jaroslav@68	635	protected Subset(String name) {
jaroslav@68	636	if (name == null) {
jaroslav@68	637	throw new NullPointerException("name");
jaroslav@68	638	}
jaroslav@68	639	this.name = name;
jaroslav@68	640	}
jaroslav@68	641
jaroslav@68	642	/**
jaroslav@68	643	* Compares two {@code Subset} objects for equality.
jaroslav@68	644	* This method returns {@code true} if and only if
jaroslav@68	645	* {@code this} and the argument refer to the same
jaroslav@68	646	* object; since this method is {@code final}, this
jaroslav@68	647	* guarantee holds for all subclasses.
jaroslav@68	648	*/
jaroslav@68	649	public final boolean equals(Object obj) {
jaroslav@68	650	return (this == obj);
jaroslav@68	651	}
jaroslav@68	652
jaroslav@68	653	/**
jaroslav@68	654	* Returns the standard hash code as defined by the
jaroslav@68	655	* {@link Object#hashCode} method. This method
jaroslav@68	656	* is {@code final} in order to ensure that the
jaroslav@68	657	* {@code equals} and {@code hashCode} methods will
jaroslav@68	658	* be consistent in all subclasses.
jaroslav@68	659	*/
jaroslav@68	660	public final int hashCode() {
jaroslav@68	661	return super.hashCode();
jaroslav@68	662	}
jaroslav@68	663
jaroslav@68	664	/**
jaroslav@68	665	* Returns the name of this subset.
jaroslav@68	666	*/
jaroslav@68	667	public final String toString() {
jaroslav@68	668	return name;
jaroslav@68	669	}
jaroslav@68	670	}
jaroslav@68	671
jaroslav@68	672	// See http://www.unicode.org/Public/UNIDATA/Blocks.txt
jaroslav@68	673	// for the latest specification of Unicode Blocks.
jaroslav@68	674
jaroslav@68	675
jaroslav@68	676	/**
jaroslav@68	677	* The value of the {@code Character}.
jaroslav@68	678	*
jaroslav@68	679	* @serial
jaroslav@68	680	*/
jaroslav@68	681	private final char value;
jaroslav@68	682
jaroslav@68	683	/** use serialVersionUID from JDK 1.0.2 for interoperability */
jaroslav@68	684	private static final long serialVersionUID = 3786198910865385080L;
jaroslav@68	685
jaroslav@68	686	/**
jaroslav@68	687	* Constructs a newly allocated {@code Character} object that
jaroslav@68	688	* represents the specified {@code char} value.
jaroslav@68	689	*
jaroslav@68	690	* @param value the value to be represented by the
jaroslav@68	691	* {@code Character} object.
jaroslav@68	692	*/
jaroslav@68	693	public Character(char value) {
jaroslav@68	694	this.value = value;
jaroslav@68	695	}
jaroslav@68	696
jaroslav@68	697	private static class CharacterCache {
jaroslav@68	698	private CharacterCache(){}
jaroslav@68	699
jaroslav@68	700	static final Character cache[] = new Character[127 + 1];
jaroslav@68	701
jaroslav@68	702	static {
jaroslav@68	703	for (int i = 0; i < cache.length; i++)
jaroslav@68	704	cache[i] = new Character((char)i);
jaroslav@68	705	}
jaroslav@68	706	}
jaroslav@68	707
jaroslav@68	708	/**
jaroslav@68	709	* Returns a <tt>Character</tt> instance representing the specified
jaroslav@68	710	* <tt>char</tt> value.
jaroslav@68	711	* If a new <tt>Character</tt> instance is not required, this method
jaroslav@68	712	* should generally be used in preference to the constructor
jaroslav@68	713	* {@link #Character(char)}, as this method is likely to yield
jaroslav@68	714	* significantly better space and time performance by caching
jaroslav@68	715	* frequently requested values.
jaroslav@68	716	*
jaroslav@68	717	* This method will always cache values in the range {@code
jaroslav@68	718	* '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
jaroslav@68	719	* cache other values outside of this range.
jaroslav@68	720	*
jaroslav@68	721	* @param c a char value.
jaroslav@68	722	* @return a <tt>Character</tt> instance representing <tt>c</tt>.
jaroslav@68	723	* @since 1.5
jaroslav@68	724	*/
jaroslav@68	725	public static Character valueOf(char c) {
jaroslav@68	726	if (c <= 127) { // must cache
jaroslav@68	727	return CharacterCache.cache[(int)c];
jaroslav@68	728	}
jaroslav@68	729	return new Character(c);
jaroslav@68	730	}
jaroslav@68	731
jaroslav@68	732	/**
jaroslav@68	733	* Returns the value of this {@code Character} object.
jaroslav@68	734	* @return the primitive {@code char} value represented by
jaroslav@68	735	* this object.
jaroslav@68	736	*/
jaroslav@68	737	public char charValue() {
jaroslav@68	738	return value;
jaroslav@68	739	}
jaroslav@68	740
jaroslav@68	741	/**
jaroslav@68	742	* Returns a hash code for this {@code Character}; equal to the result
jaroslav@68	743	* of invoking {@code charValue()}.
jaroslav@68	744	*
jaroslav@68	745	* @return a hash code value for this {@code Character}
jaroslav@68	746	*/
jaroslav@68	747	public int hashCode() {
jaroslav@68	748	return (int)value;
jaroslav@68	749	}
jaroslav@68	750
jaroslav@68	751	/**
jaroslav@68	752	* Compares this object against the specified object.
jaroslav@68	753	* The result is {@code true} if and only if the argument is not
jaroslav@68	754	* {@code null} and is a {@code Character} object that
jaroslav@68	755	* represents the same {@code char} value as this object.
jaroslav@68	756	*
jaroslav@68	757	* @param obj the object to compare with.
jaroslav@68	758	* @return {@code true} if the objects are the same;
jaroslav@68	759	* {@code false} otherwise.
jaroslav@68	760	*/
jaroslav@68	761	public boolean equals(Object obj) {
jaroslav@68	762	if (obj instanceof Character) {
jaroslav@68	763	return value == ((Character)obj).charValue();
jaroslav@68	764	}
jaroslav@68	765	return false;
jaroslav@68	766	}
jaroslav@68	767
jaroslav@68	768	/**
jaroslav@68	769	* Returns a {@code String} object representing this
jaroslav@68	770	* {@code Character}'s value. The result is a string of
jaroslav@68	771	* length 1 whose sole component is the primitive
jaroslav@68	772	* {@code char} value represented by this
jaroslav@68	773	* {@code Character} object.
jaroslav@68	774	*
jaroslav@68	775	* @return a string representation of this object.
jaroslav@68	776	*/
jaroslav@68	777	public String toString() {
jaroslav@68	778	char buf[] = {value};
jaroslav@68	779	return String.valueOf(buf);
jaroslav@68	780	}
jaroslav@68	781
jaroslav@68	782	/**
jaroslav@68	783	* Returns a {@code String} object representing the
jaroslav@68	784	* specified {@code char}. The result is a string of length
jaroslav@68	785	* 1 consisting solely of the specified {@code char}.
jaroslav@68	786	*
jaroslav@68	787	* @param c the {@code char} to be converted
jaroslav@68	788	* @return the string representation of the specified {@code char}
jaroslav@68	789	* @since 1.4
jaroslav@68	790	*/
jaroslav@68	791	public static String toString(char c) {
jaroslav@68	792	return String.valueOf(c);
jaroslav@68	793	}
jaroslav@68	794
jaroslav@68	795	/**
jaroslav@68	796	* Determines whether the specified code point is a valid
jaroslav@68	797	* <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68	798	* Unicode code point value</a>.
jaroslav@68	799	*
jaroslav@68	800	* @param codePoint the Unicode code point to be tested
jaroslav@68	801	* @return {@code true} if the specified code point value is between
jaroslav@68	802	* {@link #MIN_CODE_POINT} and
jaroslav@68	803	* {@link #MAX_CODE_POINT} inclusive;
jaroslav@68	804	* {@code false} otherwise.
jaroslav@68	805	* @since 1.5
jaroslav@68	806	*/
jaroslav@68	807	public static boolean isValidCodePoint(int codePoint) {
jaroslav@68	808	// Optimized form of:
jaroslav@68	809	// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
jaroslav@68	810	int plane = codePoint >>> 16;
jaroslav@68	811	return plane < ((MAX_CODE_POINT + 1) >>> 16);
jaroslav@68	812	}
jaroslav@68	813
jaroslav@68	814	/**
jaroslav@68	815	* Determines whether the specified character (Unicode code point)
jaroslav@68	816	* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
jaroslav@68	817	* Such code points can be represented using a single {@code char}.
jaroslav@68	818	*
jaroslav@68	819	* @param codePoint the character (Unicode code point) to be tested
jaroslav@68	820	* @return {@code true} if the specified code point is between
jaroslav@68	821	* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
jaroslav@68	822	* {@code false} otherwise.
jaroslav@68	823	* @since 1.7
jaroslav@68	824	*/
jaroslav@68	825	public static boolean isBmpCodePoint(int codePoint) {
jaroslav@68	826	return codePoint >>> 16 == 0;
jaroslav@68	827	// Optimized form of:
jaroslav@68	828	// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
jaroslav@68	829	// We consistently use logical shift (>>>) to facilitate
jaroslav@68	830	// additional runtime optimizations.
jaroslav@68	831	}
jaroslav@68	832
jaroslav@68	833	/**
jaroslav@68	834	* Determines whether the specified character (Unicode code point)
jaroslav@68	835	* is in the <a href="#supplementary">supplementary character</a> range.
jaroslav@68	836	*
jaroslav@68	837	* @param codePoint the character (Unicode code point) to be tested
jaroslav@68	838	* @return {@code true} if the specified code point is between
jaroslav@68	839	* {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
jaroslav@68	840	* {@link #MAX_CODE_POINT} inclusive;
jaroslav@68	841	* {@code false} otherwise.
jaroslav@68	842	* @since 1.5
jaroslav@68	843	*/
jaroslav@68	844	public static boolean isSupplementaryCodePoint(int codePoint) {
jaroslav@68	845	return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68	846	&& codePoint < MAX_CODE_POINT + 1;
jaroslav@68	847	}
jaroslav@68	848
jaroslav@68	849	/**
jaroslav@68	850	* Determines if the given {@code char} value is a
jaroslav@68	851	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	852	* Unicode high-surrogate code unit</a>
jaroslav@68	853	* (also known as <i>leading-surrogate code unit</i>).
jaroslav@68	854	*
jaroslav@68	855	* <p>Such values do not represent characters by themselves,
jaroslav@68	856	* but are used in the representation of
jaroslav@68	857	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	858	* in the UTF-16 encoding.
jaroslav@68	859	*
jaroslav@68	860	* @param ch the {@code char} value to be tested.
jaroslav@68	861	* @return {@code true} if the {@code char} value is between
jaroslav@68	862	* {@link #MIN_HIGH_SURROGATE} and
jaroslav@68	863	* {@link #MAX_HIGH_SURROGATE} inclusive;
jaroslav@68	864	* {@code false} otherwise.
jaroslav@68	865	* @see Character#isLowSurrogate(char)
jaroslav@68	866	* @see Character.UnicodeBlock#of(int)
jaroslav@68	867	* @since 1.5
jaroslav@68	868	*/
jaroslav@68	869	public static boolean isHighSurrogate(char ch) {
jaroslav@68	870	// Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
jaroslav@68	871	return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
jaroslav@68	872	}
jaroslav@68	873
jaroslav@68	874	/**
jaroslav@68	875	* Determines if the given {@code char} value is a
jaroslav@68	876	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	877	* Unicode low-surrogate code unit</a>
jaroslav@68	878	* (also known as <i>trailing-surrogate code unit</i>).
jaroslav@68	879	*
jaroslav@68	880	* <p>Such values do not represent characters by themselves,
jaroslav@68	881	* but are used in the representation of
jaroslav@68	882	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	883	* in the UTF-16 encoding.
jaroslav@68	884	*
jaroslav@68	885	* @param ch the {@code char} value to be tested.
jaroslav@68	886	* @return {@code true} if the {@code char} value is between
jaroslav@68	887	* {@link #MIN_LOW_SURROGATE} and
jaroslav@68	888	* {@link #MAX_LOW_SURROGATE} inclusive;
jaroslav@68	889	* {@code false} otherwise.
jaroslav@68	890	* @see Character#isHighSurrogate(char)
jaroslav@68	891	* @since 1.5
jaroslav@68	892	*/
jaroslav@68	893	public static boolean isLowSurrogate(char ch) {
jaroslav@68	894	return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
jaroslav@68	895	}
jaroslav@68	896
jaroslav@68	897	/**
jaroslav@68	898	* Determines if the given {@code char} value is a Unicode
jaroslav@68	899	* <i>surrogate code unit</i>.
jaroslav@68	900	*
jaroslav@68	901	* <p>Such values do not represent characters by themselves,
jaroslav@68	902	* but are used in the representation of
jaroslav@68	903	* <a href="#supplementary">supplementary characters</a>
jaroslav@68	904	* in the UTF-16 encoding.
jaroslav@68	905	*
jaroslav@68	906	* <p>A char value is a surrogate code unit if and only if it is either
jaroslav@68	907	* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
jaroslav@68	908	* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
jaroslav@68	909	*
jaroslav@68	910	* @param ch the {@code char} value to be tested.
jaroslav@68	911	* @return {@code true} if the {@code char} value is between
jaroslav@68	912	* {@link #MIN_SURROGATE} and
jaroslav@68	913	* {@link #MAX_SURROGATE} inclusive;
jaroslav@68	914	* {@code false} otherwise.
jaroslav@68	915	* @since 1.7
jaroslav@68	916	*/
jaroslav@68	917	public static boolean isSurrogate(char ch) {
jaroslav@68	918	return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
jaroslav@68	919	}
jaroslav@68	920
jaroslav@68	921	/**
jaroslav@68	922	* Determines whether the specified pair of {@code char}
jaroslav@68	923	* values is a valid
jaroslav@68	924	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	925	* Unicode surrogate pair</a>.
jaroslav@68	926
jaroslav@68	927	* <p>This method is equivalent to the expression:
jaroslav@68	928	* <blockquote><pre>
jaroslav@68	929	* isHighSurrogate(high) && isLowSurrogate(low)
jaroslav@68	930	* </pre></blockquote>
jaroslav@68	931	*
jaroslav@68	932	* @param high the high-surrogate code value to be tested
jaroslav@68	933	* @param low the low-surrogate code value to be tested
jaroslav@68	934	* @return {@code true} if the specified high and
jaroslav@68	935	* low-surrogate code values represent a valid surrogate pair;
jaroslav@68	936	* {@code false} otherwise.
jaroslav@68	937	* @since 1.5
jaroslav@68	938	*/
jaroslav@68	939	public static boolean isSurrogatePair(char high, char low) {
jaroslav@68	940	return isHighSurrogate(high) && isLowSurrogate(low);
jaroslav@68	941	}
jaroslav@68	942
jaroslav@68	943	/**
jaroslav@68	944	* Determines the number of {@code char} values needed to
jaroslav@68	945	* represent the specified character (Unicode code point). If the
jaroslav@68	946	* specified character is equal to or greater than 0x10000, then
jaroslav@68	947	* the method returns 2. Otherwise, the method returns 1.
jaroslav@68	948	*
jaroslav@68	949	* <p>This method doesn't validate the specified character to be a
jaroslav@68	950	* valid Unicode code point. The caller must validate the
jaroslav@68	951	* character value using {@link #isValidCodePoint(int) isValidCodePoint}
jaroslav@68	952	* if necessary.
jaroslav@68	953	*
jaroslav@68	954	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	955	* @return 2 if the character is a valid supplementary character; 1 otherwise.
jaroslav@68	956	* @see Character#isSupplementaryCodePoint(int)
jaroslav@68	957	* @since 1.5
jaroslav@68	958	*/
jaroslav@68	959	public static int charCount(int codePoint) {
jaroslav@68	960	return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
jaroslav@68	961	}
jaroslav@68	962
jaroslav@68	963	/**
jaroslav@68	964	* Converts the specified surrogate pair to its supplementary code
jaroslav@68	965	* point value. This method does not validate the specified
jaroslav@68	966	* surrogate pair. The caller must validate it using {@link
jaroslav@68	967	* #isSurrogatePair(char, char) isSurrogatePair} if necessary.
jaroslav@68	968	*
jaroslav@68	969	* @param high the high-surrogate code unit
jaroslav@68	970	* @param low the low-surrogate code unit
jaroslav@68	971	* @return the supplementary code point composed from the
jaroslav@68	972	* specified surrogate pair.
jaroslav@68	973	* @since 1.5
jaroslav@68	974	*/
jaroslav@68	975	public static int toCodePoint(char high, char low) {
jaroslav@68	976	// Optimized form of:
jaroslav@68	977	// return ((high - MIN_HIGH_SURROGATE) << 10)
jaroslav@68	978	// + (low - MIN_LOW_SURROGATE)
jaroslav@68	979	// + MIN_SUPPLEMENTARY_CODE_POINT;
jaroslav@68	980	return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68	981	- (MIN_HIGH_SURROGATE << 10)
jaroslav@68	982	- MIN_LOW_SURROGATE);
jaroslav@68	983	}
jaroslav@68	984
jaroslav@68	985	/**
jaroslav@68	986	* Returns the code point at the given index of the
jaroslav@68	987	* {@code CharSequence}. If the {@code char} value at
jaroslav@68	988	* the given index in the {@code CharSequence} is in the
jaroslav@68	989	* high-surrogate range, the following index is less than the
jaroslav@68	990	* length of the {@code CharSequence}, and the
jaroslav@68	991	* {@code char} value at the following index is in the
jaroslav@68	992	* low-surrogate range, then the supplementary code point
jaroslav@68	993	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	994	* the {@code char} value at the given index is returned.
jaroslav@68	995	*
jaroslav@68	996	* @param seq a sequence of {@code char} values (Unicode code
jaroslav@68	997	* units)
jaroslav@68	998	* @param index the index to the {@code char} values (Unicode
jaroslav@68	999	* code units) in {@code seq} to be converted
jaroslav@68	1000	* @return the Unicode code point at the given index
jaroslav@68	1001	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1002	* @exception IndexOutOfBoundsException if the value
jaroslav@68	1003	* {@code index} is negative or not less than
jaroslav@68	1004	* {@link CharSequence#length() seq.length()}.
jaroslav@68	1005	* @since 1.5
jaroslav@68	1006	*/
jaroslav@68	1007	public static int codePointAt(CharSequence seq, int index) {
jaroslav@68	1008	char c1 = seq.charAt(index++);
jaroslav@68	1009	if (isHighSurrogate(c1)) {
jaroslav@68	1010	if (index < seq.length()) {
jaroslav@68	1011	char c2 = seq.charAt(index);
jaroslav@68	1012	if (isLowSurrogate(c2)) {
jaroslav@68	1013	return toCodePoint(c1, c2);
jaroslav@68	1014	}
jaroslav@68	1015	}
jaroslav@68	1016	}
jaroslav@68	1017	return c1;
jaroslav@68	1018	}
jaroslav@68	1019
jaroslav@68	1020	/**
jaroslav@68	1021	* Returns the code point at the given index of the
jaroslav@68	1022	* {@code char} array. If the {@code char} value at
jaroslav@68	1023	* the given index in the {@code char} array is in the
jaroslav@68	1024	* high-surrogate range, the following index is less than the
jaroslav@68	1025	* length of the {@code char} array, and the
jaroslav@68	1026	* {@code char} value at the following index is in the
jaroslav@68	1027	* low-surrogate range, then the supplementary code point
jaroslav@68	1028	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1029	* the {@code char} value at the given index is returned.
jaroslav@68	1030	*
jaroslav@68	1031	* @param a the {@code char} array
jaroslav@68	1032	* @param index the index to the {@code char} values (Unicode
jaroslav@68	1033	* code units) in the {@code char} array to be converted
jaroslav@68	1034	* @return the Unicode code point at the given index
jaroslav@68	1035	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1036	* @exception IndexOutOfBoundsException if the value
jaroslav@68	1037	* {@code index} is negative or not less than
jaroslav@68	1038	* the length of the {@code char} array.
jaroslav@68	1039	* @since 1.5
jaroslav@68	1040	*/
jaroslav@68	1041	public static int codePointAt(char[] a, int index) {
jaroslav@68	1042	return codePointAtImpl(a, index, a.length);
jaroslav@68	1043	}
jaroslav@68	1044
jaroslav@68	1045	/**
jaroslav@68	1046	* Returns the code point at the given index of the
jaroslav@68	1047	* {@code char} array, where only array elements with
jaroslav@68	1048	* {@code index} less than {@code limit} can be used. If
jaroslav@68	1049	* the {@code char} value at the given index in the
jaroslav@68	1050	* {@code char} array is in the high-surrogate range, the
jaroslav@68	1051	* following index is less than the {@code limit}, and the
jaroslav@68	1052	* {@code char} value at the following index is in the
jaroslav@68	1053	* low-surrogate range, then the supplementary code point
jaroslav@68	1054	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1055	* the {@code char} value at the given index is returned.
jaroslav@68	1056	*
jaroslav@68	1057	* @param a the {@code char} array
jaroslav@68	1058	* @param index the index to the {@code char} values (Unicode
jaroslav@68	1059	* code units) in the {@code char} array to be converted
jaroslav@68	1060	* @param limit the index after the last array element that
jaroslav@68	1061	* can be used in the {@code char} array
jaroslav@68	1062	* @return the Unicode code point at the given index
jaroslav@68	1063	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1064	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1065	* argument is negative or not less than the {@code limit}
jaroslav@68	1066	* argument, or if the {@code limit} argument is negative or
jaroslav@68	1067	* greater than the length of the {@code char} array.
jaroslav@68	1068	* @since 1.5
jaroslav@68	1069	*/
jaroslav@68	1070	public static int codePointAt(char[] a, int index, int limit) {
jaroslav@68	1071	if (index >= limit \|\| limit < 0 \|\| limit > a.length) {
jaroslav@68	1072	throw new IndexOutOfBoundsException();
jaroslav@68	1073	}
jaroslav@68	1074	return codePointAtImpl(a, index, limit);
jaroslav@68	1075	}
jaroslav@68	1076
jaroslav@68	1077	// throws ArrayIndexOutofBoundsException if index out of bounds
jaroslav@68	1078	static int codePointAtImpl(char[] a, int index, int limit) {
jaroslav@68	1079	char c1 = a[index++];
jaroslav@68	1080	if (isHighSurrogate(c1)) {
jaroslav@68	1081	if (index < limit) {
jaroslav@68	1082	char c2 = a[index];
jaroslav@68	1083	if (isLowSurrogate(c2)) {
jaroslav@68	1084	return toCodePoint(c1, c2);
jaroslav@68	1085	}
jaroslav@68	1086	}
jaroslav@68	1087	}
jaroslav@68	1088	return c1;
jaroslav@68	1089	}
jaroslav@68	1090
jaroslav@68	1091	/**
jaroslav@68	1092	* Returns the code point preceding the given index of the
jaroslav@68	1093	* {@code CharSequence}. If the {@code char} value at
jaroslav@68	1094	* {@code (index - 1)} in the {@code CharSequence} is in
jaroslav@68	1095	* the low-surrogate range, {@code (index - 2)} is not
jaroslav@68	1096	* negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68	1097	* in the {@code CharSequence} is in the
jaroslav@68	1098	* high-surrogate range, then the supplementary code point
jaroslav@68	1099	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1100	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1101	* returned.
jaroslav@68	1102	*
jaroslav@68	1103	* @param seq the {@code CharSequence} instance
jaroslav@68	1104	* @param index the index following the code point that should be returned
jaroslav@68	1105	* @return the Unicode code point value before the given index.
jaroslav@68	1106	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1107	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1108	* argument is less than 1 or greater than {@link
jaroslav@68	1109	* CharSequence#length() seq.length()}.
jaroslav@68	1110	* @since 1.5
jaroslav@68	1111	*/
jaroslav@68	1112	public static int codePointBefore(CharSequence seq, int index) {
jaroslav@68	1113	char c2 = seq.charAt(--index);
jaroslav@68	1114	if (isLowSurrogate(c2)) {
jaroslav@68	1115	if (index > 0) {
jaroslav@68	1116	char c1 = seq.charAt(--index);
jaroslav@68	1117	if (isHighSurrogate(c1)) {
jaroslav@68	1118	return toCodePoint(c1, c2);
jaroslav@68	1119	}
jaroslav@68	1120	}
jaroslav@68	1121	}
jaroslav@68	1122	return c2;
jaroslav@68	1123	}
jaroslav@68	1124
jaroslav@68	1125	/**
jaroslav@68	1126	* Returns the code point preceding the given index of the
jaroslav@68	1127	* {@code char} array. If the {@code char} value at
jaroslav@68	1128	* {@code (index - 1)} in the {@code char} array is in
jaroslav@68	1129	* the low-surrogate range, {@code (index - 2)} is not
jaroslav@68	1130	* negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68	1131	* in the {@code char} array is in the
jaroslav@68	1132	* high-surrogate range, then the supplementary code point
jaroslav@68	1133	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1134	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1135	* returned.
jaroslav@68	1136	*
jaroslav@68	1137	* @param a the {@code char} array
jaroslav@68	1138	* @param index the index following the code point that should be returned
jaroslav@68	1139	* @return the Unicode code point value before the given index.
jaroslav@68	1140	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1141	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1142	* argument is less than 1 or greater than the length of the
jaroslav@68	1143	* {@code char} array
jaroslav@68	1144	* @since 1.5
jaroslav@68	1145	*/
jaroslav@68	1146	public static int codePointBefore(char[] a, int index) {
jaroslav@68	1147	return codePointBeforeImpl(a, index, 0);
jaroslav@68	1148	}
jaroslav@68	1149
jaroslav@68	1150	/**
jaroslav@68	1151	* Returns the code point preceding the given index of the
jaroslav@68	1152	* {@code char} array, where only array elements with
jaroslav@68	1153	* {@code index} greater than or equal to {@code start}
jaroslav@68	1154	* can be used. If the {@code char} value at {@code (index - 1)}
jaroslav@68	1155	* in the {@code char} array is in the
jaroslav@68	1156	* low-surrogate range, {@code (index - 2)} is not less than
jaroslav@68	1157	* {@code start}, and the {@code char} value at
jaroslav@68	1158	* {@code (index - 2)} in the {@code char} array is in
jaroslav@68	1159	* the high-surrogate range, then the supplementary code point
jaroslav@68	1160	* corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68	1161	* the {@code char} value at {@code (index - 1)} is
jaroslav@68	1162	* returned.
jaroslav@68	1163	*
jaroslav@68	1164	* @param a the {@code char} array
jaroslav@68	1165	* @param index the index following the code point that should be returned
jaroslav@68	1166	* @param start the index of the first array element in the
jaroslav@68	1167	* {@code char} array
jaroslav@68	1168	* @return the Unicode code point value before the given index.
jaroslav@68	1169	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1170	* @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68	1171	* argument is not greater than the {@code start} argument or
jaroslav@68	1172	* is greater than the length of the {@code char} array, or
jaroslav@68	1173	* if the {@code start} argument is negative or not less than
jaroslav@68	1174	* the length of the {@code char} array.
jaroslav@68	1175	* @since 1.5
jaroslav@68	1176	*/
jaroslav@68	1177	public static int codePointBefore(char[] a, int index, int start) {
jaroslav@68	1178	if (index <= start \|\| start < 0 \|\| start >= a.length) {
jaroslav@68	1179	throw new IndexOutOfBoundsException();
jaroslav@68	1180	}
jaroslav@68	1181	return codePointBeforeImpl(a, index, start);
jaroslav@68	1182	}
jaroslav@68	1183
jaroslav@68	1184	// throws ArrayIndexOutofBoundsException if index-1 out of bounds
jaroslav@68	1185	static int codePointBeforeImpl(char[] a, int index, int start) {
jaroslav@68	1186	char c2 = a[--index];
jaroslav@68	1187	if (isLowSurrogate(c2)) {
jaroslav@68	1188	if (index > start) {
jaroslav@68	1189	char c1 = a[--index];
jaroslav@68	1190	if (isHighSurrogate(c1)) {
jaroslav@68	1191	return toCodePoint(c1, c2);
jaroslav@68	1192	}
jaroslav@68	1193	}
jaroslav@68	1194	}
jaroslav@68	1195	return c2;
jaroslav@68	1196	}
jaroslav@68	1197
jaroslav@68	1198	/**
jaroslav@68	1199	* Returns the leading surrogate (a
jaroslav@68	1200	* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68	1201	* high surrogate code unit</a>) of the
jaroslav@68	1202	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	1203	* surrogate pair</a>
jaroslav@68	1204	* representing the specified supplementary character (Unicode
jaroslav@68	1205	* code point) in the UTF-16 encoding. If the specified character
jaroslav@68	1206	* is not a
jaroslav@68	1207	* <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68	1208	* an unspecified {@code char} is returned.
jaroslav@68	1209	*
jaroslav@68	1210	* <p>If
jaroslav@68	1211	* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68	1212	* is {@code true}, then
jaroslav@68	1213	* {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
jaroslav@68	1214	* {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
jaroslav@68	1215	* are also always {@code true}.
jaroslav@68	1216	*
jaroslav@68	1217	* @param codePoint a supplementary character (Unicode code point)
jaroslav@68	1218	* @return the leading surrogate code unit used to represent the
jaroslav@68	1219	* character in the UTF-16 encoding
jaroslav@68	1220	* @since 1.7
jaroslav@68	1221	*/
jaroslav@68	1222	public static char highSurrogate(int codePoint) {
jaroslav@68	1223	return (char) ((codePoint >>> 10)
jaroslav@68	1224	+ (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
jaroslav@68	1225	}
jaroslav@68	1226
jaroslav@68	1227	/**
jaroslav@68	1228	* Returns the trailing surrogate (a
jaroslav@68	1229	* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68	1230	* low surrogate code unit</a>) of the
jaroslav@68	1231	* <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68	1232	* surrogate pair</a>
jaroslav@68	1233	* representing the specified supplementary character (Unicode
jaroslav@68	1234	* code point) in the UTF-16 encoding. If the specified character
jaroslav@68	1235	* is not a
jaroslav@68	1236	* <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68	1237	* an unspecified {@code char} is returned.
jaroslav@68	1238	*
jaroslav@68	1239	* <p>If
jaroslav@68	1240	* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68	1241	* is {@code true}, then
jaroslav@68	1242	* {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
jaroslav@68	1243	* {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
jaroslav@68	1244	* are also always {@code true}.
jaroslav@68	1245	*
jaroslav@68	1246	* @param codePoint a supplementary character (Unicode code point)
jaroslav@68	1247	* @return the trailing surrogate code unit used to represent the
jaroslav@68	1248	* character in the UTF-16 encoding
jaroslav@68	1249	* @since 1.7
jaroslav@68	1250	*/
jaroslav@68	1251	public static char lowSurrogate(int codePoint) {
jaroslav@68	1252	return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
jaroslav@68	1253	}
jaroslav@68	1254
jaroslav@68	1255	/**
jaroslav@68	1256	* Converts the specified character (Unicode code point) to its
jaroslav@68	1257	* UTF-16 representation. If the specified code point is a BMP
jaroslav@68	1258	* (Basic Multilingual Plane or Plane 0) value, the same value is
jaroslav@68	1259	* stored in {@code dst[dstIndex]}, and 1 is returned. If the
jaroslav@68	1260	* specified code point is a supplementary character, its
jaroslav@68	1261	* surrogate values are stored in {@code dst[dstIndex]}
jaroslav@68	1262	* (high-surrogate) and {@code dst[dstIndex+1]}
jaroslav@68	1263	* (low-surrogate), and 2 is returned.
jaroslav@68	1264	*
jaroslav@68	1265	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	1266	* @param dst an array of {@code char} in which the
jaroslav@68	1267	* {@code codePoint}'s UTF-16 value is stored.
jaroslav@68	1268	* @param dstIndex the start index into the {@code dst}
jaroslav@68	1269	* array where the converted value is stored.
jaroslav@68	1270	* @return 1 if the code point is a BMP code point, 2 if the
jaroslav@68	1271	* code point is a supplementary code point.
jaroslav@68	1272	* @exception IllegalArgumentException if the specified
jaroslav@68	1273	* {@code codePoint} is not a valid Unicode code point.
jaroslav@68	1274	* @exception NullPointerException if the specified {@code dst} is null.
jaroslav@68	1275	* @exception IndexOutOfBoundsException if {@code dstIndex}
jaroslav@68	1276	* is negative or not less than {@code dst.length}, or if
jaroslav@68	1277	* {@code dst} at {@code dstIndex} doesn't have enough
jaroslav@68	1278	* array element(s) to store the resulting {@code char}
jaroslav@68	1279	* value(s). (If {@code dstIndex} is equal to
jaroslav@68	1280	* {@code dst.length-1} and the specified
jaroslav@68	1281	* {@code codePoint} is a supplementary character, the
jaroslav@68	1282	* high-surrogate value is not stored in
jaroslav@68	1283	* {@code dst[dstIndex]}.)
jaroslav@68	1284	* @since 1.5
jaroslav@68	1285	*/
jaroslav@68	1286	public static int toChars(int codePoint, char[] dst, int dstIndex) {
jaroslav@68	1287	if (isBmpCodePoint(codePoint)) {
jaroslav@68	1288	dst[dstIndex] = (char) codePoint;
jaroslav@68	1289	return 1;
jaroslav@68	1290	} else if (isValidCodePoint(codePoint)) {
jaroslav@68	1291	toSurrogates(codePoint, dst, dstIndex);
jaroslav@68	1292	return 2;
jaroslav@68	1293	} else {
jaroslav@68	1294	throw new IllegalArgumentException();
jaroslav@68	1295	}
jaroslav@68	1296	}
jaroslav@68	1297
jaroslav@68	1298	/**
jaroslav@68	1299	* Converts the specified character (Unicode code point) to its
jaroslav@68	1300	* UTF-16 representation stored in a {@code char} array. If
jaroslav@68	1301	* the specified code point is a BMP (Basic Multilingual Plane or
jaroslav@68	1302	* Plane 0) value, the resulting {@code char} array has
jaroslav@68	1303	* the same value as {@code codePoint}. If the specified code
jaroslav@68	1304	* point is a supplementary code point, the resulting
jaroslav@68	1305	* {@code char} array has the corresponding surrogate pair.
jaroslav@68	1306	*
jaroslav@68	1307	* @param codePoint a Unicode code point
jaroslav@68	1308	* @return a {@code char} array having
jaroslav@68	1309	* {@code codePoint}'s UTF-16 representation.
jaroslav@68	1310	* @exception IllegalArgumentException if the specified
jaroslav@68	1311	* {@code codePoint} is not a valid Unicode code point.
jaroslav@68	1312	* @since 1.5
jaroslav@68	1313	*/
jaroslav@68	1314	public static char[] toChars(int codePoint) {
jaroslav@68	1315	if (isBmpCodePoint(codePoint)) {
jaroslav@68	1316	return new char[] { (char) codePoint };
jaroslav@68	1317	} else if (isValidCodePoint(codePoint)) {
jaroslav@68	1318	char[] result = new char[2];
jaroslav@68	1319	toSurrogates(codePoint, result, 0);
jaroslav@68	1320	return result;
jaroslav@68	1321	} else {
jaroslav@68	1322	throw new IllegalArgumentException();
jaroslav@68	1323	}
jaroslav@68	1324	}
jaroslav@68	1325
jaroslav@68	1326	static void toSurrogates(int codePoint, char[] dst, int index) {
jaroslav@68	1327	// We write elements "backwards" to guarantee all-or-nothing
jaroslav@68	1328	dst[index+1] = lowSurrogate(codePoint);
jaroslav@68	1329	dst[index] = highSurrogate(codePoint);
jaroslav@68	1330	}
jaroslav@68	1331
jaroslav@68	1332	/**
jaroslav@68	1333	* Returns the number of Unicode code points in the text range of
jaroslav@68	1334	* the specified char sequence. The text range begins at the
jaroslav@68	1335	* specified {@code beginIndex} and extends to the
jaroslav@68	1336	* {@code char} at index {@code endIndex - 1}. Thus the
jaroslav@68	1337	* length (in {@code char}s) of the text range is
jaroslav@68	1338	* {@code endIndex-beginIndex}. Unpaired surrogates within
jaroslav@68	1339	* the text range count as one code point each.
jaroslav@68	1340	*
jaroslav@68	1341	* @param seq the char sequence
jaroslav@68	1342	* @param beginIndex the index to the first {@code char} of
jaroslav@68	1343	* the text range.
jaroslav@68	1344	* @param endIndex the index after the last {@code char} of
jaroslav@68	1345	* the text range.
jaroslav@68	1346	* @return the number of Unicode code points in the specified text
jaroslav@68	1347	* range
jaroslav@68	1348	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1349	* @exception IndexOutOfBoundsException if the
jaroslav@68	1350	* {@code beginIndex} is negative, or {@code endIndex}
jaroslav@68	1351	* is larger than the length of the given sequence, or
jaroslav@68	1352	* {@code beginIndex} is larger than {@code endIndex}.
jaroslav@68	1353	* @since 1.5
jaroslav@68	1354	*/
jaroslav@68	1355	public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
jaroslav@68	1356	int length = seq.length();
jaroslav@68	1357	if (beginIndex < 0 \|\| endIndex > length \|\| beginIndex > endIndex) {
jaroslav@68	1358	throw new IndexOutOfBoundsException();
jaroslav@68	1359	}
jaroslav@68	1360	int n = endIndex - beginIndex;
jaroslav@68	1361	for (int i = beginIndex; i < endIndex; ) {
jaroslav@68	1362	if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
jaroslav@68	1363	isLowSurrogate(seq.charAt(i))) {
jaroslav@68	1364	n--;
jaroslav@68	1365	i++;
jaroslav@68	1366	}
jaroslav@68	1367	}
jaroslav@68	1368	return n;
jaroslav@68	1369	}
jaroslav@68	1370
jaroslav@68	1371	/**
jaroslav@68	1372	* Returns the number of Unicode code points in a subarray of the
jaroslav@68	1373	* {@code char} array argument. The {@code offset}
jaroslav@68	1374	* argument is the index of the first {@code char} of the
jaroslav@68	1375	* subarray and the {@code count} argument specifies the
jaroslav@68	1376	* length of the subarray in {@code char}s. Unpaired
jaroslav@68	1377	* surrogates within the subarray count as one code point each.
jaroslav@68	1378	*
jaroslav@68	1379	* @param a the {@code char} array
jaroslav@68	1380	* @param offset the index of the first {@code char} in the
jaroslav@68	1381	* given {@code char} array
jaroslav@68	1382	* @param count the length of the subarray in {@code char}s
jaroslav@68	1383	* @return the number of Unicode code points in the specified subarray
jaroslav@68	1384	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1385	* @exception IndexOutOfBoundsException if {@code offset} or
jaroslav@68	1386	* {@code count} is negative, or if {@code offset +
jaroslav@68	1387	* count} is larger than the length of the given array.
jaroslav@68	1388	* @since 1.5
jaroslav@68	1389	*/
jaroslav@68	1390	public static int codePointCount(char[] a, int offset, int count) {
jaroslav@68	1391	if (count > a.length - offset \|\| offset < 0 \|\| count < 0) {
jaroslav@68	1392	throw new IndexOutOfBoundsException();
jaroslav@68	1393	}
jaroslav@68	1394	return codePointCountImpl(a, offset, count);
jaroslav@68	1395	}
jaroslav@68	1396
jaroslav@68	1397	static int codePointCountImpl(char[] a, int offset, int count) {
jaroslav@68	1398	int endIndex = offset + count;
jaroslav@68	1399	int n = count;
jaroslav@68	1400	for (int i = offset; i < endIndex; ) {
jaroslav@68	1401	if (isHighSurrogate(a[i++]) && i < endIndex &&
jaroslav@68	1402	isLowSurrogate(a[i])) {
jaroslav@68	1403	n--;
jaroslav@68	1404	i++;
jaroslav@68	1405	}
jaroslav@68	1406	}
jaroslav@68	1407	return n;
jaroslav@68	1408	}
jaroslav@68	1409
jaroslav@68	1410	/**
jaroslav@68	1411	* Returns the index within the given char sequence that is offset
jaroslav@68	1412	* from the given {@code index} by {@code codePointOffset}
jaroslav@68	1413	* code points. Unpaired surrogates within the text range given by
jaroslav@68	1414	* {@code index} and {@code codePointOffset} count as
jaroslav@68	1415	* one code point each.
jaroslav@68	1416	*
jaroslav@68	1417	* @param seq the char sequence
jaroslav@68	1418	* @param index the index to be offset
jaroslav@68	1419	* @param codePointOffset the offset in code points
jaroslav@68	1420	* @return the index within the char sequence
jaroslav@68	1421	* @exception NullPointerException if {@code seq} is null.
jaroslav@68	1422	* @exception IndexOutOfBoundsException if {@code index}
jaroslav@68	1423	* is negative or larger then the length of the char sequence,
jaroslav@68	1424	* or if {@code codePointOffset} is positive and the
jaroslav@68	1425	* subsequence starting with {@code index} has fewer than
jaroslav@68	1426	* {@code codePointOffset} code points, or if
jaroslav@68	1427	* {@code codePointOffset} is negative and the subsequence
jaroslav@68	1428	* before {@code index} has fewer than the absolute value
jaroslav@68	1429	* of {@code codePointOffset} code points.
jaroslav@68	1430	* @since 1.5
jaroslav@68	1431	*/
jaroslav@68	1432	public static int offsetByCodePoints(CharSequence seq, int index,
jaroslav@68	1433	int codePointOffset) {
jaroslav@68	1434	int length = seq.length();
jaroslav@68	1435	if (index < 0 \|\| index > length) {
jaroslav@68	1436	throw new IndexOutOfBoundsException();
jaroslav@68	1437	}
jaroslav@68	1438
jaroslav@68	1439	int x = index;
jaroslav@68	1440	if (codePointOffset >= 0) {
jaroslav@68	1441	int i;
jaroslav@68	1442	for (i = 0; x < length && i < codePointOffset; i++) {
jaroslav@68	1443	if (isHighSurrogate(seq.charAt(x++)) && x < length &&
jaroslav@68	1444	isLowSurrogate(seq.charAt(x))) {
jaroslav@68	1445	x++;
jaroslav@68	1446	}
jaroslav@68	1447	}
jaroslav@68	1448	if (i < codePointOffset) {
jaroslav@68	1449	throw new IndexOutOfBoundsException();
jaroslav@68	1450	}
jaroslav@68	1451	} else {
jaroslav@68	1452	int i;
jaroslav@68	1453	for (i = codePointOffset; x > 0 && i < 0; i++) {
jaroslav@68	1454	if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
jaroslav@68	1455	isHighSurrogate(seq.charAt(x-1))) {
jaroslav@68	1456	x--;
jaroslav@68	1457	}
jaroslav@68	1458	}
jaroslav@68	1459	if (i < 0) {
jaroslav@68	1460	throw new IndexOutOfBoundsException();
jaroslav@68	1461	}
jaroslav@68	1462	}
jaroslav@68	1463	return x;
jaroslav@68	1464	}
jaroslav@68	1465
jaroslav@68	1466	/**
jaroslav@68	1467	* Returns the index within the given {@code char} subarray
jaroslav@68	1468	* that is offset from the given {@code index} by
jaroslav@68	1469	* {@code codePointOffset} code points. The
jaroslav@68	1470	* {@code start} and {@code count} arguments specify a
jaroslav@68	1471	* subarray of the {@code char} array. Unpaired surrogates
jaroslav@68	1472	* within the text range given by {@code index} and
jaroslav@68	1473	* {@code codePointOffset} count as one code point each.
jaroslav@68	1474	*
jaroslav@68	1475	* @param a the {@code char} array
jaroslav@68	1476	* @param start the index of the first {@code char} of the
jaroslav@68	1477	* subarray
jaroslav@68	1478	* @param count the length of the subarray in {@code char}s
jaroslav@68	1479	* @param index the index to be offset
jaroslav@68	1480	* @param codePointOffset the offset in code points
jaroslav@68	1481	* @return the index within the subarray
jaroslav@68	1482	* @exception NullPointerException if {@code a} is null.
jaroslav@68	1483	* @exception IndexOutOfBoundsException
jaroslav@68	1484	* if {@code start} or {@code count} is negative,
jaroslav@68	1485	* or if {@code start + count} is larger than the length of
jaroslav@68	1486	* the given array,
jaroslav@68	1487	* or if {@code index} is less than {@code start} or
jaroslav@68	1488	* larger then {@code start + count},
jaroslav@68	1489	* or if {@code codePointOffset} is positive and the text range
jaroslav@68	1490	* starting with {@code index} and ending with {@code start + count - 1}
jaroslav@68	1491	* has fewer than {@code codePointOffset} code
jaroslav@68	1492	* points,
jaroslav@68	1493	* or if {@code codePointOffset} is negative and the text range
jaroslav@68	1494	* starting with {@code start} and ending with {@code index - 1}
jaroslav@68	1495	* has fewer than the absolute value of
jaroslav@68	1496	* {@code codePointOffset} code points.
jaroslav@68	1497	* @since 1.5
jaroslav@68	1498	*/
jaroslav@68	1499	public static int offsetByCodePoints(char[] a, int start, int count,
jaroslav@68	1500	int index, int codePointOffset) {
jaroslav@68	1501	if (count > a.length-start \|\| start < 0 \|\| count < 0
jaroslav@68	1502	\|\| index < start \|\| index > start+count) {
jaroslav@68	1503	throw new IndexOutOfBoundsException();
jaroslav@68	1504	}
jaroslav@68	1505	return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
jaroslav@68	1506	}
jaroslav@68	1507
jaroslav@68	1508	static int offsetByCodePointsImpl(char[]a, int start, int count,
jaroslav@68	1509	int index, int codePointOffset) {
jaroslav@68	1510	int x = index;
jaroslav@68	1511	if (codePointOffset >= 0) {
jaroslav@68	1512	int limit = start + count;
jaroslav@68	1513	int i;
jaroslav@68	1514	for (i = 0; x < limit && i < codePointOffset; i++) {
jaroslav@68	1515	if (isHighSurrogate(a[x++]) && x < limit &&
jaroslav@68	1516	isLowSurrogate(a[x])) {
jaroslav@68	1517	x++;
jaroslav@68	1518	}
jaroslav@68	1519	}
jaroslav@68	1520	if (i < codePointOffset) {
jaroslav@68	1521	throw new IndexOutOfBoundsException();
jaroslav@68	1522	}
jaroslav@68	1523	} else {
jaroslav@68	1524	int i;
jaroslav@68	1525	for (i = codePointOffset; x > start && i < 0; i++) {
jaroslav@68	1526	if (isLowSurrogate(a[--x]) && x > start &&
jaroslav@68	1527	isHighSurrogate(a[x-1])) {
jaroslav@68	1528	x--;
jaroslav@68	1529	}
jaroslav@68	1530	}
jaroslav@68	1531	if (i < 0) {
jaroslav@68	1532	throw new IndexOutOfBoundsException();
jaroslav@68	1533	}
jaroslav@68	1534	}
jaroslav@68	1535	return x;
jaroslav@68	1536	}
jaroslav@68	1537
jaroslav@68	1538	/**
jaroslav@68	1539	* Determines if the specified character is a lowercase character.
jaroslav@68	1540	* <p>
jaroslav@68	1541	* A character is lowercase if its general category type, provided
jaroslav@68	1542	* by {@code Character.getType(ch)}, is
jaroslav@68	1543	* {@code LOWERCASE_LETTER}, or it has contributory property
jaroslav@68	1544	* Other_Lowercase as defined by the Unicode Standard.
jaroslav@68	1545	* <p>
jaroslav@68	1546	* The following are examples of lowercase characters:
jaroslav@68	1547	* <p><blockquote><pre>
jaroslav@68	1548	* a b c d e f g h i j k l m n o p q r s t u v w x y z
jaroslav@68	1549	* '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
jaroslav@68	1550	* '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
jaroslav@68	1551	* '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
jaroslav@68	1552	* '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
jaroslav@68	1553	* </pre></blockquote>
jaroslav@68	1554	* <p> Many other Unicode characters are lowercase too.
jaroslav@68	1555	*
jaroslav@68	1556	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1557	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1558	* all Unicode characters, including supplementary characters, use
jaroslav@68	1559	* the {@link #isLowerCase(int)} method.
jaroslav@68	1560	*
jaroslav@68	1561	* @param ch the character to be tested.
jaroslav@68	1562	* @return {@code true} if the character is lowercase;
jaroslav@68	1563	* {@code false} otherwise.
jaroslav@68	1564	* @see Character#isLowerCase(char)
jaroslav@68	1565	* @see Character#isTitleCase(char)
jaroslav@68	1566	* @see Character#toLowerCase(char)
jaroslav@68	1567	* @see Character#getType(char)
jaroslav@68	1568	*/
jaroslav@68	1569	public static boolean isLowerCase(char ch) {
jaroslav@326	1570	return ch == toLowerCase(ch);
jaroslav@68	1571	}
jaroslav@68	1572
jaroslav@68	1573	/**
jaroslav@68	1574	* Determines if the specified character is an uppercase character.
jaroslav@68	1575	* <p>
jaroslav@68	1576	* A character is uppercase if its general category type, provided by
jaroslav@68	1577	* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
jaroslav@68	1578	* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
jaroslav@68	1579	* <p>
jaroslav@68	1580	* The following are examples of uppercase characters:
jaroslav@68	1581	* <p><blockquote><pre>
jaroslav@68	1582	* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
jaroslav@68	1583	* '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
jaroslav@68	1584	* '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
jaroslav@68	1585	* '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
jaroslav@68	1586	* '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
jaroslav@68	1587	* </pre></blockquote>
jaroslav@68	1588	* <p> Many other Unicode characters are uppercase too.<p>
jaroslav@68	1589	*
jaroslav@68	1590	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1591	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1592	* all Unicode characters, including supplementary characters, use
jaroslav@68	1593	* the {@link #isUpperCase(int)} method.
jaroslav@68	1594	*
jaroslav@68	1595	* @param ch the character to be tested.
jaroslav@68	1596	* @return {@code true} if the character is uppercase;
jaroslav@68	1597	* {@code false} otherwise.
jaroslav@68	1598	* @see Character#isLowerCase(char)
jaroslav@68	1599	* @see Character#isTitleCase(char)
jaroslav@68	1600	* @see Character#toUpperCase(char)
jaroslav@68	1601	* @see Character#getType(char)
jaroslav@68	1602	* @since 1.0
jaroslav@68	1603	*/
jaroslav@68	1604	public static boolean isUpperCase(char ch) {
jaroslav@326	1605	return ch == toUpperCase(ch);
jaroslav@68	1606	}
jaroslav@68	1607
jaroslav@68	1608	/**
jaroslav@68	1609	* Determines if the specified character is a titlecase character.
jaroslav@68	1610	* <p>
jaroslav@68	1611	* A character is a titlecase character if its general
jaroslav@68	1612	* category type, provided by {@code Character.getType(ch)},
jaroslav@68	1613	* is {@code TITLECASE_LETTER}.
jaroslav@68	1614	* <p>
jaroslav@68	1615	* Some characters look like pairs of Latin letters. For example, there
jaroslav@68	1616	* is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68	1617	* lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68	1618	* is the appropriate form to use when rendering a word in lowercase
jaroslav@68	1619	* with initial capitals, as for a book title.
jaroslav@68	1620	* <p>
jaroslav@68	1621	* These are some of the Unicode characters for which this method returns
jaroslav@68	1622	* {@code true}:
jaroslav@68	1623	* <ul>
jaroslav@68	1624	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68	1625	* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68	1626	* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68	1627	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68	1628	* </ul>
jaroslav@68	1629	* <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68	1630	*
jaroslav@68	1631	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1632	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1633	* all Unicode characters, including supplementary characters, use
jaroslav@68	1634	* the {@link #isTitleCase(int)} method.
jaroslav@68	1635	*
jaroslav@68	1636	* @param ch the character to be tested.
jaroslav@68	1637	* @return {@code true} if the character is titlecase;
jaroslav@68	1638	* {@code false} otherwise.
jaroslav@68	1639	* @see Character#isLowerCase(char)
jaroslav@68	1640	* @see Character#isUpperCase(char)
jaroslav@68	1641	* @see Character#toTitleCase(char)
jaroslav@68	1642	* @see Character#getType(char)
jaroslav@68	1643	* @since 1.0.2
jaroslav@68	1644	*/
jaroslav@68	1645	public static boolean isTitleCase(char ch) {
jaroslav@68	1646	return isTitleCase((int)ch);
jaroslav@68	1647	}
jaroslav@68	1648
jaroslav@68	1649	/**
jaroslav@68	1650	* Determines if the specified character (Unicode code point) is a titlecase character.
jaroslav@68	1651	* <p>
jaroslav@68	1652	* A character is a titlecase character if its general
jaroslav@68	1653	* category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68	1654	* is {@code TITLECASE_LETTER}.
jaroslav@68	1655	* <p>
jaroslav@68	1656	* Some characters look like pairs of Latin letters. For example, there
jaroslav@68	1657	* is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68	1658	* lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68	1659	* is the appropriate form to use when rendering a word in lowercase
jaroslav@68	1660	* with initial capitals, as for a book title.
jaroslav@68	1661	* <p>
jaroslav@68	1662	* These are some of the Unicode characters for which this method returns
jaroslav@68	1663	* {@code true}:
jaroslav@68	1664	* <ul>
jaroslav@68	1665	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68	1666	* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68	1667	* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68	1668	* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68	1669	* </ul>
jaroslav@68	1670	* <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68	1671	*
jaroslav@68	1672	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1673	* @return {@code true} if the character is titlecase;
jaroslav@68	1674	* {@code false} otherwise.
jaroslav@68	1675	* @see Character#isLowerCase(int)
jaroslav@68	1676	* @see Character#isUpperCase(int)
jaroslav@68	1677	* @see Character#toTitleCase(int)
jaroslav@68	1678	* @see Character#getType(int)
jaroslav@68	1679	* @since 1.5
jaroslav@68	1680	*/
jaroslav@68	1681	public static boolean isTitleCase(int codePoint) {
jaroslav@68	1682	return getType(codePoint) == Character.TITLECASE_LETTER;
jaroslav@68	1683	}
jaroslav@68	1684
jaroslav@68	1685	/**
jaroslav@68	1686	* Determines if the specified character is a digit.
jaroslav@68	1687	* <p>
jaroslav@68	1688	* A character is a digit if its general category type, provided
jaroslav@68	1689	* by {@code Character.getType(ch)}, is
jaroslav@68	1690	* {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68	1691	* <p>
jaroslav@68	1692	* Some Unicode character ranges that contain digits:
jaroslav@68	1693	* <ul>
jaroslav@68	1694	* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68	1695	* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68	1696	* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68	1697	* Arabic-Indic digits
jaroslav@68	1698	* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68	1699	* Extended Arabic-Indic digits
jaroslav@68	1700	* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68	1701	* Devanagari digits
jaroslav@68	1702	* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68	1703	* Fullwidth digits
jaroslav@68	1704	* </ul>
jaroslav@68	1705	*
jaroslav@68	1706	* Many other character ranges contain digits as well.
jaroslav@68	1707	*
jaroslav@68	1708	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1709	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1710	* all Unicode characters, including supplementary characters, use
jaroslav@68	1711	* the {@link #isDigit(int)} method.
jaroslav@68	1712	*
jaroslav@68	1713	* @param ch the character to be tested.
jaroslav@68	1714	* @return {@code true} if the character is a digit;
jaroslav@68	1715	* {@code false} otherwise.
jaroslav@68	1716	* @see Character#digit(char, int)
jaroslav@68	1717	* @see Character#forDigit(int, int)
jaroslav@68	1718	* @see Character#getType(char)
jaroslav@68	1719	*/
jaroslav@68	1720	public static boolean isDigit(char ch) {
jaroslav@326	1721	return String.valueOf(ch).matches("\\d");
jaroslav@68	1722	}
jaroslav@68	1723
jaroslav@68	1724	/**
jaroslav@68	1725	* Determines if the specified character (Unicode code point) is a digit.
jaroslav@68	1726	* <p>
jaroslav@68	1727	* A character is a digit if its general category type, provided
jaroslav@68	1728	* by {@link Character#getType(int) getType(codePoint)}, is
jaroslav@68	1729	* {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68	1730	* <p>
jaroslav@68	1731	* Some Unicode character ranges that contain digits:
jaroslav@68	1732	* <ul>
jaroslav@68	1733	* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68	1734	* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68	1735	* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68	1736	* Arabic-Indic digits
jaroslav@68	1737	* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68	1738	* Extended Arabic-Indic digits
jaroslav@68	1739	* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68	1740	* Devanagari digits
jaroslav@68	1741	* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68	1742	* Fullwidth digits
jaroslav@68	1743	* </ul>
jaroslav@68	1744	*
jaroslav@68	1745	* Many other character ranges contain digits as well.
jaroslav@68	1746	*
jaroslav@68	1747	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1748	* @return {@code true} if the character is a digit;
jaroslav@68	1749	* {@code false} otherwise.
jaroslav@68	1750	* @see Character#forDigit(int, int)
jaroslav@68	1751	* @see Character#getType(int)
jaroslav@68	1752	* @since 1.5
jaroslav@68	1753	*/
jaroslav@68	1754	public static boolean isDigit(int codePoint) {
jaroslav@326	1755	return fromCodeChars(codePoint).matches("\\d");
jaroslav@68	1756	}
jaroslav@326	1757
jaroslav@326	1758	@JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
jaroslav@326	1759	private native static String fromCodeChars(int codePoint);
jaroslav@68	1760
jaroslav@68	1761	/**
jaroslav@68	1762	* Determines if a character is defined in Unicode.
jaroslav@68	1763	* <p>
jaroslav@68	1764	* A character is defined if at least one of the following is true:
jaroslav@68	1765	* <ul>
jaroslav@68	1766	* <li>It has an entry in the UnicodeData file.
jaroslav@68	1767	* <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68	1768	* </ul>
jaroslav@68	1769	*
jaroslav@68	1770	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1771	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1772	* all Unicode characters, including supplementary characters, use
jaroslav@68	1773	* the {@link #isDefined(int)} method.
jaroslav@68	1774	*
jaroslav@68	1775	* @param ch the character to be tested
jaroslav@68	1776	* @return {@code true} if the character has a defined meaning
jaroslav@68	1777	* in Unicode; {@code false} otherwise.
jaroslav@68	1778	* @see Character#isDigit(char)
jaroslav@68	1779	* @see Character#isLetter(char)
jaroslav@68	1780	* @see Character#isLetterOrDigit(char)
jaroslav@68	1781	* @see Character#isLowerCase(char)
jaroslav@68	1782	* @see Character#isTitleCase(char)
jaroslav@68	1783	* @see Character#isUpperCase(char)
jaroslav@68	1784	* @since 1.0.2
jaroslav@68	1785	*/
jaroslav@68	1786	public static boolean isDefined(char ch) {
jaroslav@68	1787	return isDefined((int)ch);
jaroslav@68	1788	}
jaroslav@68	1789
jaroslav@68	1790	/**
jaroslav@68	1791	* Determines if a character (Unicode code point) is defined in Unicode.
jaroslav@68	1792	* <p>
jaroslav@68	1793	* A character is defined if at least one of the following is true:
jaroslav@68	1794	* <ul>
jaroslav@68	1795	* <li>It has an entry in the UnicodeData file.
jaroslav@68	1796	* <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68	1797	* </ul>
jaroslav@68	1798	*
jaroslav@68	1799	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1800	* @return {@code true} if the character has a defined meaning
jaroslav@68	1801	* in Unicode; {@code false} otherwise.
jaroslav@68	1802	* @see Character#isDigit(int)
jaroslav@68	1803	* @see Character#isLetter(int)
jaroslav@68	1804	* @see Character#isLetterOrDigit(int)
jaroslav@68	1805	* @see Character#isLowerCase(int)
jaroslav@68	1806	* @see Character#isTitleCase(int)
jaroslav@68	1807	* @see Character#isUpperCase(int)
jaroslav@68	1808	* @since 1.5
jaroslav@68	1809	*/
jaroslav@68	1810	public static boolean isDefined(int codePoint) {
jaroslav@68	1811	return getType(codePoint) != Character.UNASSIGNED;
jaroslav@68	1812	}
jaroslav@68	1813
jaroslav@68	1814	/**
jaroslav@68	1815	* Determines if the specified character is a letter.
jaroslav@68	1816	* <p>
jaroslav@68	1817	* A character is considered to be a letter if its general
jaroslav@68	1818	* category type, provided by {@code Character.getType(ch)},
jaroslav@68	1819	* is any of the following:
jaroslav@68	1820	* <ul>
jaroslav@68	1821	* <li> {@code UPPERCASE_LETTER}
jaroslav@68	1822	* <li> {@code LOWERCASE_LETTER}
jaroslav@68	1823	* <li> {@code TITLECASE_LETTER}
jaroslav@68	1824	* <li> {@code MODIFIER_LETTER}
jaroslav@68	1825	* <li> {@code OTHER_LETTER}
jaroslav@68	1826	* </ul>
jaroslav@68	1827	*
jaroslav@68	1828	* Not all letters have case. Many characters are
jaroslav@68	1829	* letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68	1830	*
jaroslav@68	1831	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1832	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1833	* all Unicode characters, including supplementary characters, use
jaroslav@68	1834	* the {@link #isLetter(int)} method.
jaroslav@68	1835	*
jaroslav@68	1836	* @param ch the character to be tested.
jaroslav@68	1837	* @return {@code true} if the character is a letter;
jaroslav@68	1838	* {@code false} otherwise.
jaroslav@68	1839	* @see Character#isDigit(char)
jaroslav@68	1840	* @see Character#isJavaIdentifierStart(char)
jaroslav@68	1841	* @see Character#isJavaLetter(char)
jaroslav@68	1842	* @see Character#isJavaLetterOrDigit(char)
jaroslav@68	1843	* @see Character#isLetterOrDigit(char)
jaroslav@68	1844	* @see Character#isLowerCase(char)
jaroslav@68	1845	* @see Character#isTitleCase(char)
jaroslav@68	1846	* @see Character#isUnicodeIdentifierStart(char)
jaroslav@68	1847	* @see Character#isUpperCase(char)
jaroslav@68	1848	*/
jaroslav@68	1849	public static boolean isLetter(char ch) {
jaroslav@326	1850	return String.valueOf(ch).matches("\\w") && !isDigit(ch);
jaroslav@68	1851	}
jaroslav@68	1852
jaroslav@68	1853	/**
jaroslav@68	1854	* Determines if the specified character (Unicode code point) is a letter.
jaroslav@68	1855	* <p>
jaroslav@68	1856	* A character is considered to be a letter if its general
jaroslav@68	1857	* category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68	1858	* is any of the following:
jaroslav@68	1859	* <ul>
jaroslav@68	1860	* <li> {@code UPPERCASE_LETTER}
jaroslav@68	1861	* <li> {@code LOWERCASE_LETTER}
jaroslav@68	1862	* <li> {@code TITLECASE_LETTER}
jaroslav@68	1863	* <li> {@code MODIFIER_LETTER}
jaroslav@68	1864	* <li> {@code OTHER_LETTER}
jaroslav@68	1865	* </ul>
jaroslav@68	1866	*
jaroslav@68	1867	* Not all letters have case. Many characters are
jaroslav@68	1868	* letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68	1869	*
jaroslav@68	1870	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1871	* @return {@code true} if the character is a letter;
jaroslav@68	1872	* {@code false} otherwise.
jaroslav@68	1873	* @see Character#isDigit(int)
jaroslav@68	1874	* @see Character#isJavaIdentifierStart(int)
jaroslav@68	1875	* @see Character#isLetterOrDigit(int)
jaroslav@68	1876	* @see Character#isLowerCase(int)
jaroslav@68	1877	* @see Character#isTitleCase(int)
jaroslav@68	1878	* @see Character#isUnicodeIdentifierStart(int)
jaroslav@68	1879	* @see Character#isUpperCase(int)
jaroslav@68	1880	* @since 1.5
jaroslav@68	1881	*/
jaroslav@68	1882	public static boolean isLetter(int codePoint) {
jaroslav@326	1883	return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
jaroslav@68	1884	}
jaroslav@68	1885
jaroslav@68	1886	/**
jaroslav@68	1887	* Determines if the specified character is a letter or digit.
jaroslav@68	1888	* <p>
jaroslav@68	1889	* A character is considered to be a letter or digit if either
jaroslav@68	1890	* {@code Character.isLetter(char ch)} or
jaroslav@68	1891	* {@code Character.isDigit(char ch)} returns
jaroslav@68	1892	* {@code true} for the character.
jaroslav@68	1893	*
jaroslav@68	1894	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	1895	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	1896	* all Unicode characters, including supplementary characters, use
jaroslav@68	1897	* the {@link #isLetterOrDigit(int)} method.
jaroslav@68	1898	*
jaroslav@68	1899	* @param ch the character to be tested.
jaroslav@68	1900	* @return {@code true} if the character is a letter or digit;
jaroslav@68	1901	* {@code false} otherwise.
jaroslav@68	1902	* @see Character#isDigit(char)
jaroslav@68	1903	* @see Character#isJavaIdentifierPart(char)
jaroslav@68	1904	* @see Character#isJavaLetter(char)
jaroslav@68	1905	* @see Character#isJavaLetterOrDigit(char)
jaroslav@68	1906	* @see Character#isLetter(char)
jaroslav@68	1907	* @see Character#isUnicodeIdentifierPart(char)
jaroslav@68	1908	* @since 1.0.2
jaroslav@68	1909	*/
jaroslav@68	1910	public static boolean isLetterOrDigit(char ch) {
jaroslav@326	1911	return String.valueOf(ch).matches("\\w");
jaroslav@68	1912	}
jaroslav@68	1913
jaroslav@68	1914	/**
jaroslav@68	1915	* Determines if the specified character (Unicode code point) is a letter or digit.
jaroslav@68	1916	* <p>
jaroslav@68	1917	* A character is considered to be a letter or digit if either
jaroslav@68	1918	* {@link #isLetter(int) isLetter(codePoint)} or
jaroslav@68	1919	* {@link #isDigit(int) isDigit(codePoint)} returns
jaroslav@68	1920	* {@code true} for the character.
jaroslav@68	1921	*
jaroslav@68	1922	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	1923	* @return {@code true} if the character is a letter or digit;
jaroslav@68	1924	* {@code false} otherwise.
jaroslav@68	1925	* @see Character#isDigit(int)
jaroslav@68	1926	* @see Character#isJavaIdentifierPart(int)
jaroslav@68	1927	* @see Character#isLetter(int)
jaroslav@68	1928	* @see Character#isUnicodeIdentifierPart(int)
jaroslav@68	1929	* @since 1.5
jaroslav@68	1930	*/
jaroslav@68	1931	public static boolean isLetterOrDigit(int codePoint) {
jaroslav@326	1932	return fromCodeChars(codePoint).matches("\\w");
jaroslav@68	1933	}
jaroslav@85	1934
jtulach@1350	1935	public static int getType(int x) {
jtulach@1350	1936	throw new UnsupportedOperationException("getType: " + (char)x);
jaroslav@68	1937	}
jaroslav@563	1938
jaroslav@563	1939	/**
jaroslav@563	1940	* Determines if the specified character is
jaroslav@563	1941	* permissible as the first character in a Java identifier.
jaroslav@563	1942	* <p>
jaroslav@563	1943	* A character may start a Java identifier if and only if
jaroslav@563	1944	* one of the following conditions is true:
jaroslav@563	1945	* <ul>
jaroslav@563	1946	* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
jaroslav@563	1947	* <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
jaroslav@563	1948	* <li> {@code ch} is a currency symbol (such as {@code '$'})
jaroslav@563	1949	* <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
jaroslav@563	1950	* </ul>
jaroslav@563	1951	*
jaroslav@563	1952	* <p><b>Note:</b> This method cannot handle <a
jaroslav@563	1953	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@563	1954	* all Unicode characters, including supplementary characters, use
jaroslav@563	1955	* the {@link #isJavaIdentifierStart(int)} method.
jaroslav@563	1956	*
jaroslav@563	1957	* @param ch the character to be tested.
jaroslav@563	1958	* @return {@code true} if the character may start a Java identifier;
jaroslav@563	1959	* {@code false} otherwise.
jaroslav@563	1960	* @see Character#isJavaIdentifierPart(char)
jaroslav@563	1961	* @see Character#isLetter(char)
jaroslav@563	1962	* @see Character#isUnicodeIdentifierStart(char)
jaroslav@563	1963	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	1964	* @since 1.1
jaroslav@563	1965	*/
jaroslav@563	1966	public static boolean isJavaIdentifierStart(char ch) {
jaroslav@563	1967	return isJavaIdentifierStart((int)ch);
jaroslav@563	1968	}
jaroslav@563	1969
jaroslav@563	1970	/**
jaroslav@563	1971	* Determines if the character (Unicode code point) is
jaroslav@563	1972	* permissible as the first character in a Java identifier.
jaroslav@563	1973	* <p>
jaroslav@563	1974	* A character may start a Java identifier if and only if
jaroslav@563	1975	* one of the following conditions is true:
jaroslav@563	1976	* <ul>
jaroslav@563	1977	* <li> {@link #isLetter(int) isLetter(codePoint)}
jaroslav@563	1978	* returns {@code true}
jaroslav@563	1979	* <li> {@link #getType(int) getType(codePoint)}
jaroslav@563	1980	* returns {@code LETTER_NUMBER}
jaroslav@563	1981	* <li> the referenced character is a currency symbol (such as {@code '$'})
jaroslav@563	1982	* <li> the referenced character is a connecting punctuation character
jaroslav@563	1983	* (such as {@code '_'}).
jaroslav@563	1984	* </ul>
jaroslav@563	1985	*
jaroslav@563	1986	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@563	1987	* @return {@code true} if the character may start a Java identifier;
jaroslav@563	1988	* {@code false} otherwise.
jaroslav@563	1989	* @see Character#isJavaIdentifierPart(int)
jaroslav@563	1990	* @see Character#isLetter(int)
jaroslav@563	1991	* @see Character#isUnicodeIdentifierStart(int)
jaroslav@563	1992	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	1993	* @since 1.5
jaroslav@563	1994	*/
jaroslav@563	1995	public static boolean isJavaIdentifierStart(int codePoint) {
jaroslav@563	1996	return
jaroslav@563	1997	('A' <= codePoint && codePoint <= 'Z') \|\|
jaroslav@563	1998	('a' <= codePoint && codePoint <= 'z');
jaroslav@563	1999	}
jaroslav@563	2000
jaroslav@563	2001	/**
jaroslav@563	2002	* Determines if the specified character may be part of a Java
jaroslav@563	2003	* identifier as other than the first character.
jaroslav@563	2004	* <p>
jaroslav@563	2005	* A character may be part of a Java identifier if any of the following
jaroslav@563	2006	* are true:
jaroslav@563	2007	* <ul>
jaroslav@563	2008	* <li> it is a letter
jaroslav@563	2009	* <li> it is a currency symbol (such as {@code '$'})
jaroslav@563	2010	* <li> it is a connecting punctuation character (such as {@code '_'})
jaroslav@563	2011	* <li> it is a digit
jaroslav@563	2012	* <li> it is a numeric letter (such as a Roman numeral character)
jaroslav@563	2013	* <li> it is a combining mark
jaroslav@563	2014	* <li> it is a non-spacing mark
jaroslav@563	2015	* <li> {@code isIdentifierIgnorable} returns
jaroslav@563	2016	* {@code true} for the character
jaroslav@563	2017	* </ul>
jaroslav@563	2018	*
jaroslav@563	2019	* <p><b>Note:</b> This method cannot handle <a
jaroslav@563	2020	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@563	2021	* all Unicode characters, including supplementary characters, use
jaroslav@563	2022	* the {@link #isJavaIdentifierPart(int)} method.
jaroslav@563	2023	*
jaroslav@563	2024	* @param ch the character to be tested.
jaroslav@563	2025	* @return {@code true} if the character may be part of a
jaroslav@563	2026	* Java identifier; {@code false} otherwise.
jaroslav@563	2027	* @see Character#isIdentifierIgnorable(char)
jaroslav@563	2028	* @see Character#isJavaIdentifierStart(char)
jaroslav@563	2029	* @see Character#isLetterOrDigit(char)
jaroslav@563	2030	* @see Character#isUnicodeIdentifierPart(char)
jaroslav@563	2031	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	2032	* @since 1.1
jaroslav@563	2033	*/
jaroslav@563	2034	public static boolean isJavaIdentifierPart(char ch) {
jaroslav@563	2035	return isJavaIdentifierPart((int)ch);
jaroslav@563	2036	}
jaroslav@563	2037
jaroslav@563	2038	/**
jaroslav@563	2039	* Determines if the character (Unicode code point) may be part of a Java
jaroslav@563	2040	* identifier as other than the first character.
jaroslav@563	2041	* <p>
jaroslav@563	2042	* A character may be part of a Java identifier if any of the following
jaroslav@563	2043	* are true:
jaroslav@563	2044	* <ul>
jaroslav@563	2045	* <li> it is a letter
jaroslav@563	2046	* <li> it is a currency symbol (such as {@code '$'})
jaroslav@563	2047	* <li> it is a connecting punctuation character (such as {@code '_'})
jaroslav@563	2048	* <li> it is a digit
jaroslav@563	2049	* <li> it is a numeric letter (such as a Roman numeral character)
jaroslav@563	2050	* <li> it is a combining mark
jaroslav@563	2051	* <li> it is a non-spacing mark
jaroslav@563	2052	* <li> {@link #isIdentifierIgnorable(int)
jaroslav@563	2053	* isIdentifierIgnorable(codePoint)} returns {@code true} for
jaroslav@563	2054	* the character
jaroslav@563	2055	* </ul>
jaroslav@563	2056	*
jaroslav@563	2057	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@563	2058	* @return {@code true} if the character may be part of a
jaroslav@563	2059	* Java identifier; {@code false} otherwise.
jaroslav@563	2060	* @see Character#isIdentifierIgnorable(int)
jaroslav@563	2061	* @see Character#isJavaIdentifierStart(int)
jaroslav@563	2062	* @see Character#isLetterOrDigit(int)
jaroslav@563	2063	* @see Character#isUnicodeIdentifierPart(int)
jaroslav@563	2064	* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563	2065	* @since 1.5
jaroslav@563	2066	*/
jaroslav@563	2067	public static boolean isJavaIdentifierPart(int codePoint) {
jaroslav@563	2068	return isJavaIdentifierStart(codePoint) \|\|
jaroslav@590	2069	('0' <= codePoint && codePoint <= '9') \|\| codePoint == '$';
jaroslav@563	2070	}
jaroslav@563	2071
jaroslav@68	2072	/**
jaroslav@68	2073	* Converts the character argument to lowercase using case
jaroslav@68	2074	* mapping information from the UnicodeData file.
jaroslav@68	2075	* <p>
jaroslav@68	2076	* Note that
jaroslav@68	2077	* {@code Character.isLowerCase(Character.toLowerCase(ch))}
jaroslav@68	2078	* does not always return {@code true} for some ranges of
jaroslav@68	2079	* characters, particularly those that are symbols or ideographs.
jaroslav@68	2080	*
jaroslav@68	2081	* <p>In general, {@link String#toLowerCase()} should be used to map
jaroslav@68	2082	* characters to lowercase. {@code String} case mapping methods
jaroslav@68	2083	* have several benefits over {@code Character} case mapping methods.
jaroslav@68	2084	* {@code String} case mapping methods can perform locale-sensitive
jaroslav@68	2085	* mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68	2086	* the {@code Character} case mapping methods cannot.
jaroslav@68	2087	*
jaroslav@68	2088	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2089	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2090	* all Unicode characters, including supplementary characters, use
jaroslav@68	2091	* the {@link #toLowerCase(int)} method.
jaroslav@68	2092	*
jaroslav@68	2093	* @param ch the character to be converted.
jaroslav@68	2094	* @return the lowercase equivalent of the character, if any;
jaroslav@68	2095	* otherwise, the character itself.
jaroslav@68	2096	* @see Character#isLowerCase(char)
jaroslav@68	2097	* @see String#toLowerCase()
jaroslav@68	2098	*/
jaroslav@68	2099	public static char toLowerCase(char ch) {
jaroslav@326	2100	return String.valueOf(ch).toLowerCase().charAt(0);
jaroslav@68	2101	}
jaroslav@68	2102
jaroslav@68	2103	/**
jaroslav@68	2104	* Converts the character argument to uppercase using case mapping
jaroslav@68	2105	* information from the UnicodeData file.
jaroslav@68	2106	* <p>
jaroslav@68	2107	* Note that
jaroslav@68	2108	* {@code Character.isUpperCase(Character.toUpperCase(ch))}
jaroslav@68	2109	* does not always return {@code true} for some ranges of
jaroslav@68	2110	* characters, particularly those that are symbols or ideographs.
jaroslav@68	2111	*
jaroslav@68	2112	* <p>In general, {@link String#toUpperCase()} should be used to map
jaroslav@68	2113	* characters to uppercase. {@code String} case mapping methods
jaroslav@68	2114	* have several benefits over {@code Character} case mapping methods.
jaroslav@68	2115	* {@code String} case mapping methods can perform locale-sensitive
jaroslav@68	2116	* mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68	2117	* the {@code Character} case mapping methods cannot.
jaroslav@68	2118	*
jaroslav@68	2119	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2120	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2121	* all Unicode characters, including supplementary characters, use
jaroslav@68	2122	* the {@link #toUpperCase(int)} method.
jaroslav@68	2123	*
jaroslav@68	2124	* @param ch the character to be converted.
jaroslav@68	2125	* @return the uppercase equivalent of the character, if any;
jaroslav@68	2126	* otherwise, the character itself.
jaroslav@68	2127	* @see Character#isUpperCase(char)
jaroslav@68	2128	* @see String#toUpperCase()
jaroslav@68	2129	*/
jaroslav@68	2130	public static char toUpperCase(char ch) {
jaroslav@326	2131	return String.valueOf(ch).toUpperCase().charAt(0);
jaroslav@68	2132	}
jaroslav@68	2133
jaroslav@68	2134	/**
jaroslav@68	2135	* Returns the numeric value of the character {@code ch} in the
jaroslav@68	2136	* specified radix.
jaroslav@68	2137	* <p>
jaroslav@68	2138	* If the radix is not in the range {@code MIN_RADIX} ≤
jaroslav@68	2139	* {@code radix} ≤ {@code MAX_RADIX} or if the
jaroslav@68	2140	* value of {@code ch} is not a valid digit in the specified
jaroslav@68	2141	* radix, {@code -1} is returned. A character is a valid digit
jaroslav@68	2142	* if at least one of the following is true:
jaroslav@68	2143	* <ul>
jaroslav@68	2144	* <li>The method {@code isDigit} is {@code true} of the character
jaroslav@68	2145	* and the Unicode decimal digit value of the character (or its
jaroslav@68	2146	* single-character decomposition) is less than the specified radix.
jaroslav@68	2147	* In this case the decimal digit value is returned.
jaroslav@68	2148	* <li>The character is one of the uppercase Latin letters
jaroslav@68	2149	* {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68	2150	* {@code radix + 'A' - 10}.
jaroslav@68	2151	* In this case, {@code ch - 'A' + 10}
jaroslav@68	2152	* is returned.
jaroslav@68	2153	* <li>The character is one of the lowercase Latin letters
jaroslav@68	2154	* {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68	2155	* {@code radix + 'a' - 10}.
jaroslav@68	2156	* In this case, {@code ch - 'a' + 10}
jaroslav@68	2157	* is returned.
jaroslav@68	2158	* <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68	2159	* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68	2160	* and its code is less than
jaroslav@68	2161	* {@code radix + '\u005CuFF21' - 10}.
jaroslav@68	2162	* In this case, {@code ch - '\u005CuFF21' + 10}
jaroslav@68	2163	* is returned.
jaroslav@68	2164	* <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68	2165	* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68	2166	* and its code is less than
jaroslav@68	2167	* {@code radix + '\u005CuFF41' - 10}.
jaroslav@68	2168	* In this case, {@code ch - '\u005CuFF41' + 10}
jaroslav@68	2169	* is returned.
jaroslav@68	2170	* </ul>
jaroslav@68	2171	*
jaroslav@68	2172	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2173	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2174	* all Unicode characters, including supplementary characters, use
jaroslav@68	2175	* the {@link #digit(int, int)} method.
jaroslav@68	2176	*
jaroslav@68	2177	* @param ch the character to be converted.
jaroslav@68	2178	* @param radix the radix.
jaroslav@68	2179	* @return the numeric value represented by the character in the
jaroslav@68	2180	* specified radix.
jaroslav@68	2181	* @see Character#forDigit(int, int)
jaroslav@68	2182	* @see Character#isDigit(char)
jaroslav@68	2183	*/
jaroslav@68	2184	public static int digit(char ch, int radix) {
jaroslav@68	2185	return digit((int)ch, radix);
jaroslav@68	2186	}
jaroslav@68	2187
jaroslav@68	2188	/**
jaroslav@68	2189	* Returns the numeric value of the specified character (Unicode
jaroslav@68	2190	* code point) in the specified radix.
jaroslav@68	2191	*
jaroslav@68	2192	* <p>If the radix is not in the range {@code MIN_RADIX} ≤
jaroslav@68	2193	* {@code radix} ≤ {@code MAX_RADIX} or if the
jaroslav@68	2194	* character is not a valid digit in the specified
jaroslav@68	2195	* radix, {@code -1} is returned. A character is a valid digit
jaroslav@68	2196	* if at least one of the following is true:
jaroslav@68	2197	* <ul>
jaroslav@68	2198	* <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
jaroslav@68	2199	* and the Unicode decimal digit value of the character (or its
jaroslav@68	2200	* single-character decomposition) is less than the specified radix.
jaroslav@68	2201	* In this case the decimal digit value is returned.
jaroslav@68	2202	* <li>The character is one of the uppercase Latin letters
jaroslav@68	2203	* {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68	2204	* {@code radix + 'A' - 10}.
jaroslav@68	2205	* In this case, {@code codePoint - 'A' + 10}
jaroslav@68	2206	* is returned.
jaroslav@68	2207	* <li>The character is one of the lowercase Latin letters
jaroslav@68	2208	* {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68	2209	* {@code radix + 'a' - 10}.
jaroslav@68	2210	* In this case, {@code codePoint - 'a' + 10}
jaroslav@68	2211	* is returned.
jaroslav@68	2212	* <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68	2213	* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68	2214	* and its code is less than
jaroslav@68	2215	* {@code radix + '\u005CuFF21' - 10}.
jaroslav@68	2216	* In this case,
jaroslav@68	2217	* {@code codePoint - '\u005CuFF21' + 10}
jaroslav@68	2218	* is returned.
jaroslav@68	2219	* <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68	2220	* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68	2221	* and its code is less than
jaroslav@68	2222	* {@code radix + '\u005CuFF41'- 10}.
jaroslav@68	2223	* In this case,
jaroslav@68	2224	* {@code codePoint - '\u005CuFF41' + 10}
jaroslav@68	2225	* is returned.
jaroslav@68	2226	* </ul>
jaroslav@68	2227	*
jaroslav@68	2228	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	2229	* @param radix the radix.
jaroslav@68	2230	* @return the numeric value represented by the character in the
jaroslav@68	2231	* specified radix.
jaroslav@68	2232	* @see Character#forDigit(int, int)
jaroslav@68	2233	* @see Character#isDigit(int)
jaroslav@68	2234	* @since 1.5
jaroslav@68	2235	*/
Martin@594	2236	@JavaScriptBody(args = { "codePoint", "radix" }, body=
Martin@594	2237	"var x = parseInt(String.fromCharCode(codePoint), radix);\n"
Martin@594	2238	+ "return isNaN(x) ? -1 : x;"
Martin@594	2239	)
jaroslav@68	2240	public static int digit(int codePoint, int radix) {
jaroslav@85	2241	throw new UnsupportedOperationException();
jaroslav@68	2242	}
jaroslav@68	2243
jaroslav@68	2244	/**
jaroslav@68	2245	* Returns the {@code int} value that the specified Unicode
jaroslav@68	2246	* character represents. For example, the character
jaroslav@68	2247	* {@code '\u005Cu216C'} (the roman numeral fifty) will return
jaroslav@68	2248	* an int with a value of 50.
jaroslav@68	2249	* <p>
jaroslav@68	2250	* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68	2251	* {@code '\u005Cu005A'}), lowercase
jaroslav@68	2252	* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68	2253	* full width variant ({@code '\u005CuFF21'} through
jaroslav@68	2254	* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68	2255	* {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68	2256	* through 35. This is independent of the Unicode specification,
jaroslav@68	2257	* which does not assign numeric values to these {@code char}
jaroslav@68	2258	* values.
jaroslav@68	2259	* <p>
jaroslav@68	2260	* If the character does not have a numeric value, then -1 is returned.
jaroslav@68	2261	* If the character has a numeric value that cannot be represented as a
jaroslav@68	2262	* nonnegative integer (for example, a fractional value), then -2
jaroslav@68	2263	* is returned.
jaroslav@68	2264	*
jaroslav@68	2265	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2266	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2267	* all Unicode characters, including supplementary characters, use
jaroslav@68	2268	* the {@link #getNumericValue(int)} method.
jaroslav@68	2269	*
jaroslav@68	2270	* @param ch the character to be converted.
jaroslav@68	2271	* @return the numeric value of the character, as a nonnegative {@code int}
jaroslav@68	2272	* value; -2 if the character has a numeric value that is not a
jaroslav@68	2273	* nonnegative integer; -1 if the character has no numeric value.
jaroslav@68	2274	* @see Character#forDigit(int, int)
jaroslav@68	2275	* @see Character#isDigit(char)
jaroslav@68	2276	* @since 1.1
jaroslav@68	2277	*/
jaroslav@68	2278	public static int getNumericValue(char ch) {
jaroslav@68	2279	return getNumericValue((int)ch);
jaroslav@68	2280	}
jaroslav@68	2281
jaroslav@68	2282	/**
jaroslav@68	2283	* Returns the {@code int} value that the specified
jaroslav@68	2284	* character (Unicode code point) represents. For example, the character
jaroslav@68	2285	* {@code '\u005Cu216C'} (the Roman numeral fifty) will return
jaroslav@68	2286	* an {@code int} with a value of 50.
jaroslav@68	2287	* <p>
jaroslav@68	2288	* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68	2289	* {@code '\u005Cu005A'}), lowercase
jaroslav@68	2290	* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68	2291	* full width variant ({@code '\u005CuFF21'} through
jaroslav@68	2292	* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68	2293	* {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68	2294	* through 35. This is independent of the Unicode specification,
jaroslav@68	2295	* which does not assign numeric values to these {@code char}
jaroslav@68	2296	* values.
jaroslav@68	2297	* <p>
jaroslav@68	2298	* If the character does not have a numeric value, then -1 is returned.
jaroslav@68	2299	* If the character has a numeric value that cannot be represented as a
jaroslav@68	2300	* nonnegative integer (for example, a fractional value), then -2
jaroslav@68	2301	* is returned.
jaroslav@68	2302	*
jaroslav@68	2303	* @param codePoint the character (Unicode code point) to be converted.
jaroslav@68	2304	* @return the numeric value of the character, as a nonnegative {@code int}
jaroslav@68	2305	* value; -2 if the character has a numeric value that is not a
jaroslav@68	2306	* nonnegative integer; -1 if the character has no numeric value.
jaroslav@68	2307	* @see Character#forDigit(int, int)
jaroslav@68	2308	* @see Character#isDigit(int)
jaroslav@68	2309	* @since 1.5
jaroslav@68	2310	*/
jaroslav@68	2311	public static int getNumericValue(int codePoint) {
jaroslav@85	2312	throw new UnsupportedOperationException();
jaroslav@68	2313	}
jaroslav@68	2314
jaroslav@68	2315	/**
jaroslav@68	2316	* Determines if the specified character is ISO-LATIN-1 white space.
jaroslav@68	2317	* This method returns {@code true} for the following five
jaroslav@68	2318	* characters only:
jaroslav@68	2319	* <table>
jaroslav@68	2320	* <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
jaroslav@68	2321	* <td>{@code HORIZONTAL TABULATION}</td></tr>
jaroslav@68	2322	* <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
jaroslav@68	2323	* <td>{@code NEW LINE}</td></tr>
jaroslav@68	2324	* <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
jaroslav@68	2325	* <td>{@code FORM FEED}</td></tr>
jaroslav@68	2326	* <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
jaroslav@68	2327	* <td>{@code CARRIAGE RETURN}</td></tr>
jaroslav@68	2328	* <tr><td>{@code ' '}</td> <td>{@code U+0020}</td>
jaroslav@68	2329	* <td>{@code SPACE}</td></tr>
jaroslav@68	2330	* </table>
jaroslav@68	2331	*
jaroslav@68	2332	* @param ch the character to be tested.
jaroslav@68	2333	* @return {@code true} if the character is ISO-LATIN-1 white
jaroslav@68	2334	* space; {@code false} otherwise.
jaroslav@68	2335	* @see Character#isSpaceChar(char)
jaroslav@68	2336	* @see Character#isWhitespace(char)
jaroslav@68	2337	* @deprecated Replaced by isWhitespace(char).
jaroslav@68	2338	*/
jaroslav@68	2339	@Deprecated
jaroslav@68	2340	public static boolean isSpace(char ch) {
jaroslav@1260	2341	return isSpaceChar(ch);
jaroslav@1260	2342	}
jaroslav@1260	2343
jaroslav@1260	2344	public static boolean isSpaceChar(int ch) {
jaroslav@68	2345	return (ch <= 0x0020) &&
jaroslav@68	2346	(((((1L << 0x0009) \|
jaroslav@68	2347	(1L << 0x000A) \|
jaroslav@68	2348	(1L << 0x000C) \|
jaroslav@68	2349	(1L << 0x000D) \|
jaroslav@68	2350	(1L << 0x0020)) >> ch) & 1L) != 0);
jaroslav@68	2351	}
jaroslav@68	2352
jaroslav@68	2353
jaroslav@68	2354	/**
jaroslav@68	2355	* Determines if the specified character is white space according to Java.
jaroslav@68	2356	* A character is a Java whitespace character if and only if it satisfies
jaroslav@68	2357	* one of the following criteria:
jaroslav@68	2358	* <ul>
jaroslav@68	2359	* <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
jaroslav@68	2360	* {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
jaroslav@68	2361	* but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68	2362	* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68	2363	* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68	2364	* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68	2365	* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68	2366	* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68	2367	* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68	2368	* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68	2369	* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68	2370	* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68	2371	* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68	2372	* </ul>
jaroslav@68	2373	*
jaroslav@68	2374	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2375	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2376	* all Unicode characters, including supplementary characters, use
jaroslav@68	2377	* the {@link #isWhitespace(int)} method.
jaroslav@68	2378	*
jaroslav@68	2379	* @param ch the character to be tested.
jaroslav@68	2380	* @return {@code true} if the character is a Java whitespace
jaroslav@68	2381	* character; {@code false} otherwise.
jaroslav@68	2382	* @see Character#isSpaceChar(char)
jaroslav@68	2383	* @since 1.1
jaroslav@68	2384	*/
jaroslav@68	2385	public static boolean isWhitespace(char ch) {
jaroslav@68	2386	return isWhitespace((int)ch);
jaroslav@68	2387	}
jaroslav@68	2388
jaroslav@68	2389	/**
jaroslav@68	2390	* Determines if the specified character (Unicode code point) is
jaroslav@68	2391	* white space according to Java. A character is a Java
jaroslav@68	2392	* whitespace character if and only if it satisfies one of the
jaroslav@68	2393	* following criteria:
jaroslav@68	2394	* <ul>
jaroslav@68	2395	* <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
jaroslav@68	2396	* {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
jaroslav@68	2397	* but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68	2398	* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68	2399	* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68	2400	* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68	2401	* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68	2402	* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68	2403	* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68	2404	* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68	2405	* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68	2406	* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68	2407	* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68	2408	* </ul>
jaroslav@68	2409	* <p>
jaroslav@68	2410	*
jaroslav@68	2411	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	2412	* @return {@code true} if the character is a Java whitespace
jaroslav@68	2413	* character; {@code false} otherwise.
jaroslav@68	2414	* @see Character#isSpaceChar(int)
jaroslav@68	2415	* @since 1.5
jaroslav@68	2416	*/
jaroslav@68	2417	public static boolean isWhitespace(int codePoint) {
jaroslav@1291	2418	if (
jaroslav@1291	2419	codePoint == SPACE_SEPARATOR \|\|
jaroslav@1291	2420	codePoint == LINE_SEPARATOR \|\|
jaroslav@1291	2421	codePoint == PARAGRAPH_SEPARATOR
jaroslav@1291	2422	) {
jaroslav@1291	2423	return true;
jaroslav@1291	2424	}
jaroslav@1291	2425	return false;
jaroslav@68	2426	}
jaroslav@68	2427
jaroslav@68	2428	/**
jaroslav@68	2429	* Determines if the specified character is an ISO control
jaroslav@68	2430	* character. A character is considered to be an ISO control
jaroslav@68	2431	* character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68	2432	* through {@code '\u005Cu001F'} or in the range
jaroslav@68	2433	* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68	2434	*
jaroslav@68	2435	* <p><b>Note:</b> This method cannot handle <a
jaroslav@68	2436	* href="#supplementary"> supplementary characters</a>. To support
jaroslav@68	2437	* all Unicode characters, including supplementary characters, use
jaroslav@68	2438	* the {@link #isISOControl(int)} method.
jaroslav@68	2439	*
jaroslav@68	2440	* @param ch the character to be tested.
jaroslav@68	2441	* @return {@code true} if the character is an ISO control character;
jaroslav@68	2442	* {@code false} otherwise.
jaroslav@68	2443	*
jaroslav@68	2444	* @see Character#isSpaceChar(char)
jaroslav@68	2445	* @see Character#isWhitespace(char)
jaroslav@68	2446	* @since 1.1
jaroslav@68	2447	*/
jaroslav@68	2448	public static boolean isISOControl(char ch) {
jaroslav@68	2449	return isISOControl((int)ch);
jaroslav@68	2450	}
jaroslav@68	2451
jaroslav@68	2452	/**
jaroslav@68	2453	* Determines if the referenced character (Unicode code point) is an ISO control
jaroslav@68	2454	* character. A character is considered to be an ISO control
jaroslav@68	2455	* character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68	2456	* through {@code '\u005Cu001F'} or in the range
jaroslav@68	2457	* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68	2458	*
jaroslav@68	2459	* @param codePoint the character (Unicode code point) to be tested.
jaroslav@68	2460	* @return {@code true} if the character is an ISO control character;
jaroslav@68	2461	* {@code false} otherwise.
jaroslav@68	2462	* @see Character#isSpaceChar(int)
jaroslav@68	2463	* @see Character#isWhitespace(int)
jaroslav@68	2464	* @since 1.5
jaroslav@68	2465	*/
jaroslav@68	2466	public static boolean isISOControl(int codePoint) {
jaroslav@68	2467	// Optimized form of:
jaroslav@68	2468	// (codePoint >= 0x00 && codePoint <= 0x1F) \|\|
jaroslav@68	2469	// (codePoint >= 0x7F && codePoint <= 0x9F);
jaroslav@68	2470	return codePoint <= 0x9F &&
jaroslav@68	2471	(codePoint >= 0x7F \|\| (codePoint >>> 5 == 0));
jaroslav@68	2472	}
jaroslav@68	2473
jaroslav@68	2474	/**
jaroslav@68	2475	* Determines the character representation for a specific digit in
jaroslav@68	2476	* the specified radix. If the value of {@code radix} is not a
jaroslav@68	2477	* valid radix, or the value of {@code digit} is not a valid
jaroslav@68	2478	* digit in the specified radix, the null character
jaroslav@68	2479	* ({@code '\u005Cu0000'}) is returned.
jaroslav@68	2480	* <p>
jaroslav@68	2481	* The {@code radix} argument is valid if it is greater than or
jaroslav@68	2482	* equal to {@code MIN_RADIX} and less than or equal to
jaroslav@68	2483	* {@code MAX_RADIX}. The {@code digit} argument is valid if
jaroslav@68	2484	* {@code 0 <= digit < radix}.
jaroslav@68	2485	* <p>
jaroslav@68	2486	* If the digit is less than 10, then
jaroslav@68	2487	* {@code '0' + digit} is returned. Otherwise, the value
jaroslav@68	2488	* {@code 'a' + digit - 10} is returned.
jaroslav@68	2489	*
jaroslav@68	2490	* @param digit the number to convert to a character.
jaroslav@68	2491	* @param radix the radix.
jaroslav@68	2492	* @return the {@code char} representation of the specified digit
jaroslav@68	2493	* in the specified radix.
jaroslav@68	2494	* @see Character#MIN_RADIX
jaroslav@68	2495	* @see Character#MAX_RADIX
jaroslav@68	2496	* @see Character#digit(char, int)
jaroslav@68	2497	*/
jaroslav@68	2498	public static char forDigit(int digit, int radix) {
jaroslav@68	2499	if ((digit >= radix) \|\| (digit < 0)) {
jaroslav@68	2500	return '\0';
jaroslav@68	2501	}
jaroslav@68	2502	if ((radix < Character.MIN_RADIX) \|\| (radix > Character.MAX_RADIX)) {
jaroslav@68	2503	return '\0';
jaroslav@68	2504	}
jaroslav@68	2505	if (digit < 10) {
jaroslav@68	2506	return (char)('0' + digit);
jaroslav@68	2507	}
jaroslav@68	2508	return (char)('a' - 10 + digit);
jaroslav@68	2509	}
jaroslav@68	2510
jaroslav@68	2511	/**
jaroslav@68	2512	* Compares two {@code Character} objects numerically.
jaroslav@68	2513	*
jaroslav@68	2514	* @param anotherCharacter the {@code Character} to be compared.
jaroslav@68	2515
jaroslav@68	2516	* @return the value {@code 0} if the argument {@code Character}
jaroslav@68	2517	* is equal to this {@code Character}; a value less than
jaroslav@68	2518	* {@code 0} if this {@code Character} is numerically less
jaroslav@68	2519	* than the {@code Character} argument; and a value greater than
jaroslav@68	2520	* {@code 0} if this {@code Character} is numerically greater
jaroslav@68	2521	* than the {@code Character} argument (unsigned comparison).
jaroslav@68	2522	* Note that this is strictly a numerical comparison; it is not
jaroslav@68	2523	* locale-dependent.
jaroslav@68	2524	* @since 1.2
jaroslav@68	2525	*/
jaroslav@68	2526	public int compareTo(Character anotherCharacter) {
jaroslav@68	2527	return compare(this.value, anotherCharacter.value);
jaroslav@68	2528	}
jaroslav@68	2529
jaroslav@68	2530	/**
jaroslav@68	2531	* Compares two {@code char} values numerically.
jaroslav@68	2532	* The value returned is identical to what would be returned by:
jaroslav@68	2533	* <pre>
jaroslav@68	2534	* Character.valueOf(x).compareTo(Character.valueOf(y))
jaroslav@68	2535	* </pre>
jaroslav@68	2536	*
jaroslav@68	2537	* @param x the first {@code char} to compare
jaroslav@68	2538	* @param y the second {@code char} to compare
jaroslav@68	2539	* @return the value {@code 0} if {@code x == y};
jaroslav@68	2540	* a value less than {@code 0} if {@code x < y}; and
jaroslav@68	2541	* a value greater than {@code 0} if {@code x > y}
jaroslav@68	2542	* @since 1.7
jaroslav@68	2543	*/
jaroslav@68	2544	public static int compare(char x, char y) {
jaroslav@68	2545	return x - y;
jaroslav@68	2546	}
jaroslav@68	2547
jaroslav@68	2548
jaroslav@68	2549	/**
jaroslav@68	2550	* The number of bits used to represent a <tt>char</tt> value in unsigned
jaroslav@68	2551	* binary form, constant {@code 16}.
jaroslav@68	2552	*
jaroslav@68	2553	* @since 1.5
jaroslav@68	2554	*/
jaroslav@68	2555	public static final int SIZE = 16;
jaroslav@68	2556
jaroslav@68	2557	/**
jaroslav@68	2558	* Returns the value obtained by reversing the order of the bytes in the
jaroslav@68	2559	* specified <tt>char</tt> value.
jaroslav@68	2560	*
jaroslav@68	2561	* @return the value obtained by reversing (or, equivalently, swapping)
jaroslav@68	2562	* the bytes in the specified <tt>char</tt> value.
jaroslav@68	2563	* @since 1.5
jaroslav@68	2564	*/
jaroslav@68	2565	public static char reverseBytes(char ch) {
jaroslav@68	2566	return (char) (((ch & 0xFF00) >> 8) \| (ch << 8));
jaroslav@68	2567	}
jaroslav@68	2568
jaroslav@791	2569	static {
jaroslav@791	2570	// as last step of initialization, initialize valueOf method
jaroslav@791	2571	initValueOf();
jaroslav@791	2572	}
jaroslav@791	2573	@JavaScriptBody(args = {}, body =
jaroslav@791	2574	"vm.java_lang_Character(false)." +
jaroslav@791	2575	"valueOf = function() { return this._value(); };"
jaroslav@791	2576	)
jaroslav@791	2577	private native static void initValueOf();
jaroslav@791	2578
jaroslav@68	2579	}

author	Jaroslav Tulach <jtulach@netbeans.org>
	Mon, 07 Oct 2013 16:17:21 +0200
changeset 1350	f14e9730d4e9
parent 1291	f66bcda82345
child 1384	12a395b571c8
permissions	-rw-r--r--