emul/mini/src/main/java/java/lang/Character.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Sat, 26 Jan 2013 08:47:05 +0100
changeset 592 5e13b1ac2886
parent 563 6bfc15870186
child 594 035fcbd7a33c
permissions -rw-r--r--
In order to support fields of the same name in subclasses we are now prefixing them with name of the class that defines them. To provide convenient way to access them from generated bytecode and also directly from JavaScript, there is a getter/setter function for each field. It starts with _ followed by the field name. If called with a parameter, it sets the field, with a parameter it just returns it.
jaroslav@68
     1
/*
jaroslav@68
     2
 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
jaroslav@68
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@68
     4
 *
jaroslav@68
     5
 * This code is free software; you can redistribute it and/or modify it
jaroslav@68
     6
 * under the terms of the GNU General Public License version 2 only, as
jaroslav@68
     7
 * published by the Free Software Foundation.  Oracle designates this
jaroslav@68
     8
 * particular file as subject to the "Classpath" exception as provided
jaroslav@68
     9
 * by Oracle in the LICENSE file that accompanied this code.
jaroslav@68
    10
 *
jaroslav@68
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@68
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@68
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
jaroslav@68
    14
 * version 2 for more details (a copy is included in the LICENSE file that
jaroslav@68
    15
 * accompanied this code).
jaroslav@68
    16
 *
jaroslav@68
    17
 * You should have received a copy of the GNU General Public License version
jaroslav@68
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@68
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@68
    20
 *
jaroslav@68
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@68
    22
 * or visit www.oracle.com if you need additional information or have any
jaroslav@68
    23
 * questions.
jaroslav@68
    24
 */
jaroslav@68
    25
jaroslav@68
    26
package java.lang;
jaroslav@68
    27
jaroslav@326
    28
import org.apidesign.bck2brwsr.core.JavaScriptBody;
jaroslav@326
    29
jaroslav@68
    30
/**
jaroslav@68
    31
 * The {@code Character} class wraps a value of the primitive
jaroslav@68
    32
 * type {@code char} in an object. An object of type
jaroslav@68
    33
 * {@code Character} contains a single field whose type is
jaroslav@68
    34
 * {@code char}.
jaroslav@68
    35
 * <p>
jaroslav@68
    36
 * In addition, this class provides several methods for determining
jaroslav@68
    37
 * a character's category (lowercase letter, digit, etc.) and for converting
jaroslav@68
    38
 * characters from uppercase to lowercase and vice versa.
jaroslav@68
    39
 * <p>
jaroslav@68
    40
 * Character information is based on the Unicode Standard, version 6.0.0.
jaroslav@68
    41
 * <p>
jaroslav@68
    42
 * The methods and data of class {@code Character} are defined by
jaroslav@68
    43
 * the information in the <i>UnicodeData</i> file that is part of the
jaroslav@68
    44
 * Unicode Character Database maintained by the Unicode
jaroslav@68
    45
 * Consortium. This file specifies various properties including name
jaroslav@68
    46
 * and general category for every defined Unicode code point or
jaroslav@68
    47
 * character range.
jaroslav@68
    48
 * <p>
jaroslav@68
    49
 * The file and its description are available from the Unicode Consortium at:
jaroslav@68
    50
 * <ul>
jaroslav@68
    51
 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
jaroslav@68
    52
 * </ul>
jaroslav@68
    53
 *
jaroslav@68
    54
 * <h4><a name="unicode">Unicode Character Representations</a></h4>
jaroslav@68
    55
 *
jaroslav@68
    56
 * <p>The {@code char} data type (and therefore the value that a
jaroslav@68
    57
 * {@code Character} object encapsulates) are based on the
jaroslav@68
    58
 * original Unicode specification, which defined characters as
jaroslav@68
    59
 * fixed-width 16-bit entities. The Unicode Standard has since been
jaroslav@68
    60
 * changed to allow for characters whose representation requires more
jaroslav@68
    61
 * than 16 bits.  The range of legal <em>code point</em>s is now
jaroslav@68
    62
 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
jaroslav@68
    63
 * (Refer to the <a
jaroslav@68
    64
 * href="http://www.unicode.org/reports/tr27/#notation"><i>
jaroslav@68
    65
 * definition</i></a> of the U+<i>n</i> notation in the Unicode
jaroslav@68
    66
 * Standard.)
jaroslav@68
    67
 *
jaroslav@68
    68
 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
jaroslav@68
    69
 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
jaroslav@68
    70
 * <a name="supplementary">Characters</a> whose code points are greater
jaroslav@68
    71
 * than U+FFFF are called <em>supplementary character</em>s.  The Java
jaroslav@68
    72
 * platform uses the UTF-16 representation in {@code char} arrays and
jaroslav@68
    73
 * in the {@code String} and {@code StringBuffer} classes. In
jaroslav@68
    74
 * this representation, supplementary characters are represented as a pair
jaroslav@68
    75
 * of {@code char} values, the first from the <em>high-surrogates</em>
jaroslav@68
    76
 * range, (&#92;uD800-&#92;uDBFF), the second from the
jaroslav@68
    77
 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
jaroslav@68
    78
 *
jaroslav@68
    79
 * <p>A {@code char} value, therefore, represents Basic
jaroslav@68
    80
 * Multilingual Plane (BMP) code points, including the surrogate
jaroslav@68
    81
 * code points, or code units of the UTF-16 encoding. An
jaroslav@68
    82
 * {@code int} value represents all Unicode code points,
jaroslav@68
    83
 * including supplementary code points. The lower (least significant)
jaroslav@68
    84
 * 21 bits of {@code int} are used to represent Unicode code
jaroslav@68
    85
 * points and the upper (most significant) 11 bits must be zero.
jaroslav@68
    86
 * Unless otherwise specified, the behavior with respect to
jaroslav@68
    87
 * supplementary characters and surrogate {@code char} values is
jaroslav@68
    88
 * as follows:
jaroslav@68
    89
 *
jaroslav@68
    90
 * <ul>
jaroslav@68
    91
 * <li>The methods that only accept a {@code char} value cannot support
jaroslav@68
    92
 * supplementary characters. They treat {@code char} values from the
jaroslav@68
    93
 * surrogate ranges as undefined characters. For example,
jaroslav@68
    94
 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
jaroslav@68
    95
 * this specific value if followed by any low-surrogate value in a string
jaroslav@68
    96
 * would represent a letter.
jaroslav@68
    97
 *
jaroslav@68
    98
 * <li>The methods that accept an {@code int} value support all
jaroslav@68
    99
 * Unicode characters, including supplementary characters. For
jaroslav@68
   100
 * example, {@code Character.isLetter(0x2F81A)} returns
jaroslav@68
   101
 * {@code true} because the code point value represents a letter
jaroslav@68
   102
 * (a CJK ideograph).
jaroslav@68
   103
 * </ul>
jaroslav@68
   104
 *
jaroslav@68
   105
 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
jaroslav@68
   106
 * used for character values in the range between U+0000 and U+10FFFF,
jaroslav@68
   107
 * and <em>Unicode code unit</em> is used for 16-bit
jaroslav@68
   108
 * {@code char} values that are code units of the <em>UTF-16</em>
jaroslav@68
   109
 * encoding. For more information on Unicode terminology, refer to the
jaroslav@68
   110
 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
jaroslav@68
   111
 *
jaroslav@68
   112
 * @author  Lee Boynton
jaroslav@68
   113
 * @author  Guy Steele
jaroslav@68
   114
 * @author  Akira Tanaka
jaroslav@68
   115
 * @author  Martin Buchholz
jaroslav@68
   116
 * @author  Ulf Zibis
jaroslav@68
   117
 * @since   1.0
jaroslav@68
   118
 */
jaroslav@68
   119
public final
jaroslav@68
   120
class Character implements java.io.Serializable, Comparable<Character> {
jaroslav@68
   121
    /**
jaroslav@68
   122
     * The minimum radix available for conversion to and from strings.
jaroslav@68
   123
     * The constant value of this field is the smallest value permitted
jaroslav@68
   124
     * for the radix argument in radix-conversion methods such as the
jaroslav@68
   125
     * {@code digit} method, the {@code forDigit} method, and the
jaroslav@68
   126
     * {@code toString} method of class {@code Integer}.
jaroslav@68
   127
     *
jaroslav@68
   128
     * @see     Character#digit(char, int)
jaroslav@68
   129
     * @see     Character#forDigit(int, int)
jaroslav@68
   130
     * @see     Integer#toString(int, int)
jaroslav@68
   131
     * @see     Integer#valueOf(String)
jaroslav@68
   132
     */
jaroslav@68
   133
    public static final int MIN_RADIX = 2;
jaroslav@68
   134
jaroslav@68
   135
    /**
jaroslav@68
   136
     * The maximum radix available for conversion to and from strings.
jaroslav@68
   137
     * The constant value of this field is the largest value permitted
jaroslav@68
   138
     * for the radix argument in radix-conversion methods such as the
jaroslav@68
   139
     * {@code digit} method, the {@code forDigit} method, and the
jaroslav@68
   140
     * {@code toString} method of class {@code Integer}.
jaroslav@68
   141
     *
jaroslav@68
   142
     * @see     Character#digit(char, int)
jaroslav@68
   143
     * @see     Character#forDigit(int, int)
jaroslav@68
   144
     * @see     Integer#toString(int, int)
jaroslav@68
   145
     * @see     Integer#valueOf(String)
jaroslav@68
   146
     */
jaroslav@68
   147
    public static final int MAX_RADIX = 36;
jaroslav@68
   148
jaroslav@68
   149
    /**
jaroslav@68
   150
     * The constant value of this field is the smallest value of type
jaroslav@68
   151
     * {@code char}, {@code '\u005Cu0000'}.
jaroslav@68
   152
     *
jaroslav@68
   153
     * @since   1.0.2
jaroslav@68
   154
     */
jaroslav@68
   155
    public static final char MIN_VALUE = '\u0000';
jaroslav@68
   156
jaroslav@68
   157
    /**
jaroslav@68
   158
     * The constant value of this field is the largest value of type
jaroslav@68
   159
     * {@code char}, {@code '\u005CuFFFF'}.
jaroslav@68
   160
     *
jaroslav@68
   161
     * @since   1.0.2
jaroslav@68
   162
     */
jaroslav@68
   163
    public static final char MAX_VALUE = '\uFFFF';
jaroslav@68
   164
jaroslav@68
   165
    /**
jaroslav@68
   166
     * The {@code Class} instance representing the primitive type
jaroslav@68
   167
     * {@code char}.
jaroslav@68
   168
     *
jaroslav@68
   169
     * @since   1.1
jaroslav@68
   170
     */
jaroslav@68
   171
    public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
jaroslav@68
   172
jaroslav@68
   173
    /*
jaroslav@68
   174
     * Normative general types
jaroslav@68
   175
     */
jaroslav@68
   176
jaroslav@68
   177
    /*
jaroslav@68
   178
     * General character types
jaroslav@68
   179
     */
jaroslav@68
   180
jaroslav@68
   181
    /**
jaroslav@68
   182
     * General category "Cn" in the Unicode specification.
jaroslav@68
   183
     * @since   1.1
jaroslav@68
   184
     */
jaroslav@68
   185
    public static final byte UNASSIGNED = 0;
jaroslav@68
   186
jaroslav@68
   187
    /**
jaroslav@68
   188
     * General category "Lu" in the Unicode specification.
jaroslav@68
   189
     * @since   1.1
jaroslav@68
   190
     */
jaroslav@68
   191
    public static final byte UPPERCASE_LETTER = 1;
jaroslav@68
   192
jaroslav@68
   193
    /**
jaroslav@68
   194
     * General category "Ll" in the Unicode specification.
jaroslav@68
   195
     * @since   1.1
jaroslav@68
   196
     */
jaroslav@68
   197
    public static final byte LOWERCASE_LETTER = 2;
jaroslav@68
   198
jaroslav@68
   199
    /**
jaroslav@68
   200
     * General category "Lt" in the Unicode specification.
jaroslav@68
   201
     * @since   1.1
jaroslav@68
   202
     */
jaroslav@68
   203
    public static final byte TITLECASE_LETTER = 3;
jaroslav@68
   204
jaroslav@68
   205
    /**
jaroslav@68
   206
     * General category "Lm" in the Unicode specification.
jaroslav@68
   207
     * @since   1.1
jaroslav@68
   208
     */
jaroslav@68
   209
    public static final byte MODIFIER_LETTER = 4;
jaroslav@68
   210
jaroslav@68
   211
    /**
jaroslav@68
   212
     * General category "Lo" in the Unicode specification.
jaroslav@68
   213
     * @since   1.1
jaroslav@68
   214
     */
jaroslav@68
   215
    public static final byte OTHER_LETTER = 5;
jaroslav@68
   216
jaroslav@68
   217
    /**
jaroslav@68
   218
     * General category "Mn" in the Unicode specification.
jaroslav@68
   219
     * @since   1.1
jaroslav@68
   220
     */
jaroslav@68
   221
    public static final byte NON_SPACING_MARK = 6;
jaroslav@68
   222
jaroslav@68
   223
    /**
jaroslav@68
   224
     * General category "Me" in the Unicode specification.
jaroslav@68
   225
     * @since   1.1
jaroslav@68
   226
     */
jaroslav@68
   227
    public static final byte ENCLOSING_MARK = 7;
jaroslav@68
   228
jaroslav@68
   229
    /**
jaroslav@68
   230
     * General category "Mc" in the Unicode specification.
jaroslav@68
   231
     * @since   1.1
jaroslav@68
   232
     */
jaroslav@68
   233
    public static final byte COMBINING_SPACING_MARK = 8;
jaroslav@68
   234
jaroslav@68
   235
    /**
jaroslav@68
   236
     * General category "Nd" in the Unicode specification.
jaroslav@68
   237
     * @since   1.1
jaroslav@68
   238
     */
jaroslav@68
   239
    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
jaroslav@68
   240
jaroslav@68
   241
    /**
jaroslav@68
   242
     * General category "Nl" in the Unicode specification.
jaroslav@68
   243
     * @since   1.1
jaroslav@68
   244
     */
jaroslav@68
   245
    public static final byte LETTER_NUMBER = 10;
jaroslav@68
   246
jaroslav@68
   247
    /**
jaroslav@68
   248
     * General category "No" in the Unicode specification.
jaroslav@68
   249
     * @since   1.1
jaroslav@68
   250
     */
jaroslav@68
   251
    public static final byte OTHER_NUMBER = 11;
jaroslav@68
   252
jaroslav@68
   253
    /**
jaroslav@68
   254
     * General category "Zs" in the Unicode specification.
jaroslav@68
   255
     * @since   1.1
jaroslav@68
   256
     */
jaroslav@68
   257
    public static final byte SPACE_SEPARATOR = 12;
jaroslav@68
   258
jaroslav@68
   259
    /**
jaroslav@68
   260
     * General category "Zl" in the Unicode specification.
jaroslav@68
   261
     * @since   1.1
jaroslav@68
   262
     */
jaroslav@68
   263
    public static final byte LINE_SEPARATOR = 13;
jaroslav@68
   264
jaroslav@68
   265
    /**
jaroslav@68
   266
     * General category "Zp" in the Unicode specification.
jaroslav@68
   267
     * @since   1.1
jaroslav@68
   268
     */
jaroslav@68
   269
    public static final byte PARAGRAPH_SEPARATOR = 14;
jaroslav@68
   270
jaroslav@68
   271
    /**
jaroslav@68
   272
     * General category "Cc" in the Unicode specification.
jaroslav@68
   273
     * @since   1.1
jaroslav@68
   274
     */
jaroslav@68
   275
    public static final byte CONTROL = 15;
jaroslav@68
   276
jaroslav@68
   277
    /**
jaroslav@68
   278
     * General category "Cf" in the Unicode specification.
jaroslav@68
   279
     * @since   1.1
jaroslav@68
   280
     */
jaroslav@68
   281
    public static final byte FORMAT = 16;
jaroslav@68
   282
jaroslav@68
   283
    /**
jaroslav@68
   284
     * General category "Co" in the Unicode specification.
jaroslav@68
   285
     * @since   1.1
jaroslav@68
   286
     */
jaroslav@68
   287
    public static final byte PRIVATE_USE = 18;
jaroslav@68
   288
jaroslav@68
   289
    /**
jaroslav@68
   290
     * General category "Cs" in the Unicode specification.
jaroslav@68
   291
     * @since   1.1
jaroslav@68
   292
     */
jaroslav@68
   293
    public static final byte SURROGATE = 19;
jaroslav@68
   294
jaroslav@68
   295
    /**
jaroslav@68
   296
     * General category "Pd" in the Unicode specification.
jaroslav@68
   297
     * @since   1.1
jaroslav@68
   298
     */
jaroslav@68
   299
    public static final byte DASH_PUNCTUATION = 20;
jaroslav@68
   300
jaroslav@68
   301
    /**
jaroslav@68
   302
     * General category "Ps" in the Unicode specification.
jaroslav@68
   303
     * @since   1.1
jaroslav@68
   304
     */
jaroslav@68
   305
    public static final byte START_PUNCTUATION = 21;
jaroslav@68
   306
jaroslav@68
   307
    /**
jaroslav@68
   308
     * General category "Pe" in the Unicode specification.
jaroslav@68
   309
     * @since   1.1
jaroslav@68
   310
     */
jaroslav@68
   311
    public static final byte END_PUNCTUATION = 22;
jaroslav@68
   312
jaroslav@68
   313
    /**
jaroslav@68
   314
     * General category "Pc" in the Unicode specification.
jaroslav@68
   315
     * @since   1.1
jaroslav@68
   316
     */
jaroslav@68
   317
    public static final byte CONNECTOR_PUNCTUATION = 23;
jaroslav@68
   318
jaroslav@68
   319
    /**
jaroslav@68
   320
     * General category "Po" in the Unicode specification.
jaroslav@68
   321
     * @since   1.1
jaroslav@68
   322
     */
jaroslav@68
   323
    public static final byte OTHER_PUNCTUATION = 24;
jaroslav@68
   324
jaroslav@68
   325
    /**
jaroslav@68
   326
     * General category "Sm" in the Unicode specification.
jaroslav@68
   327
     * @since   1.1
jaroslav@68
   328
     */
jaroslav@68
   329
    public static final byte MATH_SYMBOL = 25;
jaroslav@68
   330
jaroslav@68
   331
    /**
jaroslav@68
   332
     * General category "Sc" in the Unicode specification.
jaroslav@68
   333
     * @since   1.1
jaroslav@68
   334
     */
jaroslav@68
   335
    public static final byte CURRENCY_SYMBOL = 26;
jaroslav@68
   336
jaroslav@68
   337
    /**
jaroslav@68
   338
     * General category "Sk" in the Unicode specification.
jaroslav@68
   339
     * @since   1.1
jaroslav@68
   340
     */
jaroslav@68
   341
    public static final byte MODIFIER_SYMBOL = 27;
jaroslav@68
   342
jaroslav@68
   343
    /**
jaroslav@68
   344
     * General category "So" in the Unicode specification.
jaroslav@68
   345
     * @since   1.1
jaroslav@68
   346
     */
jaroslav@68
   347
    public static final byte OTHER_SYMBOL = 28;
jaroslav@68
   348
jaroslav@68
   349
    /**
jaroslav@68
   350
     * General category "Pi" in the Unicode specification.
jaroslav@68
   351
     * @since   1.4
jaroslav@68
   352
     */
jaroslav@68
   353
    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
jaroslav@68
   354
jaroslav@68
   355
    /**
jaroslav@68
   356
     * General category "Pf" in the Unicode specification.
jaroslav@68
   357
     * @since   1.4
jaroslav@68
   358
     */
jaroslav@68
   359
    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
jaroslav@68
   360
jaroslav@68
   361
    /**
jaroslav@68
   362
     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
jaroslav@68
   363
     */
jaroslav@68
   364
    static final int ERROR = 0xFFFFFFFF;
jaroslav@68
   365
jaroslav@68
   366
jaroslav@68
   367
    /**
jaroslav@68
   368
     * Undefined bidirectional character type. Undefined {@code char}
jaroslav@68
   369
     * values have undefined directionality in the Unicode specification.
jaroslav@68
   370
     * @since 1.4
jaroslav@68
   371
     */
jaroslav@68
   372
    public static final byte DIRECTIONALITY_UNDEFINED = -1;
jaroslav@68
   373
jaroslav@68
   374
    /**
jaroslav@68
   375
     * Strong bidirectional character type "L" in the Unicode specification.
jaroslav@68
   376
     * @since 1.4
jaroslav@68
   377
     */
jaroslav@68
   378
    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
jaroslav@68
   379
jaroslav@68
   380
    /**
jaroslav@68
   381
     * Strong bidirectional character type "R" in the Unicode specification.
jaroslav@68
   382
     * @since 1.4
jaroslav@68
   383
     */
jaroslav@68
   384
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
jaroslav@68
   385
jaroslav@68
   386
    /**
jaroslav@68
   387
    * Strong bidirectional character type "AL" in the Unicode specification.
jaroslav@68
   388
     * @since 1.4
jaroslav@68
   389
     */
jaroslav@68
   390
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
jaroslav@68
   391
jaroslav@68
   392
    /**
jaroslav@68
   393
     * Weak bidirectional character type "EN" in the Unicode specification.
jaroslav@68
   394
     * @since 1.4
jaroslav@68
   395
     */
jaroslav@68
   396
    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
jaroslav@68
   397
jaroslav@68
   398
    /**
jaroslav@68
   399
     * Weak bidirectional character type "ES" in the Unicode specification.
jaroslav@68
   400
     * @since 1.4
jaroslav@68
   401
     */
jaroslav@68
   402
    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
jaroslav@68
   403
jaroslav@68
   404
    /**
jaroslav@68
   405
     * Weak bidirectional character type "ET" in the Unicode specification.
jaroslav@68
   406
     * @since 1.4
jaroslav@68
   407
     */
jaroslav@68
   408
    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
jaroslav@68
   409
jaroslav@68
   410
    /**
jaroslav@68
   411
     * Weak bidirectional character type "AN" in the Unicode specification.
jaroslav@68
   412
     * @since 1.4
jaroslav@68
   413
     */
jaroslav@68
   414
    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
jaroslav@68
   415
jaroslav@68
   416
    /**
jaroslav@68
   417
     * Weak bidirectional character type "CS" in the Unicode specification.
jaroslav@68
   418
     * @since 1.4
jaroslav@68
   419
     */
jaroslav@68
   420
    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
jaroslav@68
   421
jaroslav@68
   422
    /**
jaroslav@68
   423
     * Weak bidirectional character type "NSM" in the Unicode specification.
jaroslav@68
   424
     * @since 1.4
jaroslav@68
   425
     */
jaroslav@68
   426
    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
jaroslav@68
   427
jaroslav@68
   428
    /**
jaroslav@68
   429
     * Weak bidirectional character type "BN" in the Unicode specification.
jaroslav@68
   430
     * @since 1.4
jaroslav@68
   431
     */
jaroslav@68
   432
    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
jaroslav@68
   433
jaroslav@68
   434
    /**
jaroslav@68
   435
     * Neutral bidirectional character type "B" in the Unicode specification.
jaroslav@68
   436
     * @since 1.4
jaroslav@68
   437
     */
jaroslav@68
   438
    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
jaroslav@68
   439
jaroslav@68
   440
    /**
jaroslav@68
   441
     * Neutral bidirectional character type "S" in the Unicode specification.
jaroslav@68
   442
     * @since 1.4
jaroslav@68
   443
     */
jaroslav@68
   444
    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
jaroslav@68
   445
jaroslav@68
   446
    /**
jaroslav@68
   447
     * Neutral bidirectional character type "WS" in the Unicode specification.
jaroslav@68
   448
     * @since 1.4
jaroslav@68
   449
     */
jaroslav@68
   450
    public static final byte DIRECTIONALITY_WHITESPACE = 12;
jaroslav@68
   451
jaroslav@68
   452
    /**
jaroslav@68
   453
     * Neutral bidirectional character type "ON" in the Unicode specification.
jaroslav@68
   454
     * @since 1.4
jaroslav@68
   455
     */
jaroslav@68
   456
    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
jaroslav@68
   457
jaroslav@68
   458
    /**
jaroslav@68
   459
     * Strong bidirectional character type "LRE" in the Unicode specification.
jaroslav@68
   460
     * @since 1.4
jaroslav@68
   461
     */
jaroslav@68
   462
    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
jaroslav@68
   463
jaroslav@68
   464
    /**
jaroslav@68
   465
     * Strong bidirectional character type "LRO" in the Unicode specification.
jaroslav@68
   466
     * @since 1.4
jaroslav@68
   467
     */
jaroslav@68
   468
    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
jaroslav@68
   469
jaroslav@68
   470
    /**
jaroslav@68
   471
     * Strong bidirectional character type "RLE" in the Unicode specification.
jaroslav@68
   472
     * @since 1.4
jaroslav@68
   473
     */
jaroslav@68
   474
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
jaroslav@68
   475
jaroslav@68
   476
    /**
jaroslav@68
   477
     * Strong bidirectional character type "RLO" in the Unicode specification.
jaroslav@68
   478
     * @since 1.4
jaroslav@68
   479
     */
jaroslav@68
   480
    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
jaroslav@68
   481
jaroslav@68
   482
    /**
jaroslav@68
   483
     * Weak bidirectional character type "PDF" in the Unicode specification.
jaroslav@68
   484
     * @since 1.4
jaroslav@68
   485
     */
jaroslav@68
   486
    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
jaroslav@68
   487
jaroslav@68
   488
    /**
jaroslav@68
   489
     * The minimum value of a
jaroslav@68
   490
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68
   491
     * Unicode high-surrogate code unit</a>
jaroslav@68
   492
     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68
   493
     * A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68
   494
     *
jaroslav@68
   495
     * @since 1.5
jaroslav@68
   496
     */
jaroslav@68
   497
    public static final char MIN_HIGH_SURROGATE = '\uD800';
jaroslav@68
   498
jaroslav@68
   499
    /**
jaroslav@68
   500
     * The maximum value of a
jaroslav@68
   501
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68
   502
     * Unicode high-surrogate code unit</a>
jaroslav@68
   503
     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
jaroslav@68
   504
     * A high-surrogate is also known as a <i>leading-surrogate</i>.
jaroslav@68
   505
     *
jaroslav@68
   506
     * @since 1.5
jaroslav@68
   507
     */
jaroslav@68
   508
    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
jaroslav@68
   509
jaroslav@68
   510
    /**
jaroslav@68
   511
     * The minimum value of a
jaroslav@68
   512
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68
   513
     * Unicode low-surrogate code unit</a>
jaroslav@68
   514
     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
jaroslav@68
   515
     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68
   516
     *
jaroslav@68
   517
     * @since 1.5
jaroslav@68
   518
     */
jaroslav@68
   519
    public static final char MIN_LOW_SURROGATE  = '\uDC00';
jaroslav@68
   520
jaroslav@68
   521
    /**
jaroslav@68
   522
     * The maximum value of a
jaroslav@68
   523
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68
   524
     * Unicode low-surrogate code unit</a>
jaroslav@68
   525
     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68
   526
     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
jaroslav@68
   527
     *
jaroslav@68
   528
     * @since 1.5
jaroslav@68
   529
     */
jaroslav@68
   530
    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
jaroslav@68
   531
jaroslav@68
   532
    /**
jaroslav@68
   533
     * The minimum value of a Unicode surrogate code unit in the
jaroslav@68
   534
     * UTF-16 encoding, constant {@code '\u005CuD800'}.
jaroslav@68
   535
     *
jaroslav@68
   536
     * @since 1.5
jaroslav@68
   537
     */
jaroslav@68
   538
    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
jaroslav@68
   539
jaroslav@68
   540
    /**
jaroslav@68
   541
     * The maximum value of a Unicode surrogate code unit in the
jaroslav@68
   542
     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
jaroslav@68
   543
     *
jaroslav@68
   544
     * @since 1.5
jaroslav@68
   545
     */
jaroslav@68
   546
    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
jaroslav@68
   547
jaroslav@68
   548
    /**
jaroslav@68
   549
     * The minimum value of a
jaroslav@68
   550
     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
jaroslav@68
   551
     * Unicode supplementary code point</a>, constant {@code U+10000}.
jaroslav@68
   552
     *
jaroslav@68
   553
     * @since 1.5
jaroslav@68
   554
     */
jaroslav@68
   555
    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
jaroslav@68
   556
jaroslav@68
   557
    /**
jaroslav@68
   558
     * The minimum value of a
jaroslav@68
   559
     * <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68
   560
     * Unicode code point</a>, constant {@code U+0000}.
jaroslav@68
   561
     *
jaroslav@68
   562
     * @since 1.5
jaroslav@68
   563
     */
jaroslav@68
   564
    public static final int MIN_CODE_POINT = 0x000000;
jaroslav@68
   565
jaroslav@68
   566
    /**
jaroslav@68
   567
     * The maximum value of a
jaroslav@68
   568
     * <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68
   569
     * Unicode code point</a>, constant {@code U+10FFFF}.
jaroslav@68
   570
     *
jaroslav@68
   571
     * @since 1.5
jaroslav@68
   572
     */
jaroslav@68
   573
    public static final int MAX_CODE_POINT = 0X10FFFF;
jaroslav@68
   574
jaroslav@68
   575
jaroslav@68
   576
    /**
jaroslav@68
   577
     * Instances of this class represent particular subsets of the Unicode
jaroslav@68
   578
     * character set.  The only family of subsets defined in the
jaroslav@68
   579
     * {@code Character} class is {@link Character.UnicodeBlock}.
jaroslav@68
   580
     * Other portions of the Java API may define other subsets for their
jaroslav@68
   581
     * own purposes.
jaroslav@68
   582
     *
jaroslav@68
   583
     * @since 1.2
jaroslav@68
   584
     */
jaroslav@68
   585
    public static class Subset  {
jaroslav@68
   586
jaroslav@68
   587
        private String name;
jaroslav@68
   588
jaroslav@68
   589
        /**
jaroslav@68
   590
         * Constructs a new {@code Subset} instance.
jaroslav@68
   591
         *
jaroslav@68
   592
         * @param  name  The name of this subset
jaroslav@68
   593
         * @exception NullPointerException if name is {@code null}
jaroslav@68
   594
         */
jaroslav@68
   595
        protected Subset(String name) {
jaroslav@68
   596
            if (name == null) {
jaroslav@68
   597
                throw new NullPointerException("name");
jaroslav@68
   598
            }
jaroslav@68
   599
            this.name = name;
jaroslav@68
   600
        }
jaroslav@68
   601
jaroslav@68
   602
        /**
jaroslav@68
   603
         * Compares two {@code Subset} objects for equality.
jaroslav@68
   604
         * This method returns {@code true} if and only if
jaroslav@68
   605
         * {@code this} and the argument refer to the same
jaroslav@68
   606
         * object; since this method is {@code final}, this
jaroslav@68
   607
         * guarantee holds for all subclasses.
jaroslav@68
   608
         */
jaroslav@68
   609
        public final boolean equals(Object obj) {
jaroslav@68
   610
            return (this == obj);
jaroslav@68
   611
        }
jaroslav@68
   612
jaroslav@68
   613
        /**
jaroslav@68
   614
         * Returns the standard hash code as defined by the
jaroslav@68
   615
         * {@link Object#hashCode} method.  This method
jaroslav@68
   616
         * is {@code final} in order to ensure that the
jaroslav@68
   617
         * {@code equals} and {@code hashCode} methods will
jaroslav@68
   618
         * be consistent in all subclasses.
jaroslav@68
   619
         */
jaroslav@68
   620
        public final int hashCode() {
jaroslav@68
   621
            return super.hashCode();
jaroslav@68
   622
        }
jaroslav@68
   623
jaroslav@68
   624
        /**
jaroslav@68
   625
         * Returns the name of this subset.
jaroslav@68
   626
         */
jaroslav@68
   627
        public final String toString() {
jaroslav@68
   628
            return name;
jaroslav@68
   629
        }
jaroslav@68
   630
    }
jaroslav@68
   631
jaroslav@68
   632
    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
jaroslav@68
   633
    // for the latest specification of Unicode Blocks.
jaroslav@68
   634
jaroslav@68
   635
jaroslav@68
   636
    /**
jaroslav@68
   637
     * The value of the {@code Character}.
jaroslav@68
   638
     *
jaroslav@68
   639
     * @serial
jaroslav@68
   640
     */
jaroslav@68
   641
    private final char value;
jaroslav@68
   642
jaroslav@68
   643
    /** use serialVersionUID from JDK 1.0.2 for interoperability */
jaroslav@68
   644
    private static final long serialVersionUID = 3786198910865385080L;
jaroslav@68
   645
jaroslav@68
   646
    /**
jaroslav@68
   647
     * Constructs a newly allocated {@code Character} object that
jaroslav@68
   648
     * represents the specified {@code char} value.
jaroslav@68
   649
     *
jaroslav@68
   650
     * @param  value   the value to be represented by the
jaroslav@68
   651
     *                  {@code Character} object.
jaroslav@68
   652
     */
jaroslav@68
   653
    public Character(char value) {
jaroslav@68
   654
        this.value = value;
jaroslav@68
   655
    }
jaroslav@68
   656
jaroslav@68
   657
    private static class CharacterCache {
jaroslav@68
   658
        private CharacterCache(){}
jaroslav@68
   659
jaroslav@68
   660
        static final Character cache[] = new Character[127 + 1];
jaroslav@68
   661
jaroslav@68
   662
        static {
jaroslav@68
   663
            for (int i = 0; i < cache.length; i++)
jaroslav@68
   664
                cache[i] = new Character((char)i);
jaroslav@68
   665
        }
jaroslav@68
   666
    }
jaroslav@68
   667
jaroslav@68
   668
    /**
jaroslav@68
   669
     * Returns a <tt>Character</tt> instance representing the specified
jaroslav@68
   670
     * <tt>char</tt> value.
jaroslav@68
   671
     * If a new <tt>Character</tt> instance is not required, this method
jaroslav@68
   672
     * should generally be used in preference to the constructor
jaroslav@68
   673
     * {@link #Character(char)}, as this method is likely to yield
jaroslav@68
   674
     * significantly better space and time performance by caching
jaroslav@68
   675
     * frequently requested values.
jaroslav@68
   676
     *
jaroslav@68
   677
     * This method will always cache values in the range {@code
jaroslav@68
   678
     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
jaroslav@68
   679
     * cache other values outside of this range.
jaroslav@68
   680
     *
jaroslav@68
   681
     * @param  c a char value.
jaroslav@68
   682
     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
jaroslav@68
   683
     * @since  1.5
jaroslav@68
   684
     */
jaroslav@68
   685
    public static Character valueOf(char c) {
jaroslav@68
   686
        if (c <= 127) { // must cache
jaroslav@68
   687
            return CharacterCache.cache[(int)c];
jaroslav@68
   688
        }
jaroslav@68
   689
        return new Character(c);
jaroslav@68
   690
    }
jaroslav@68
   691
jaroslav@68
   692
    /**
jaroslav@68
   693
     * Returns the value of this {@code Character} object.
jaroslav@68
   694
     * @return  the primitive {@code char} value represented by
jaroslav@68
   695
     *          this object.
jaroslav@68
   696
     */
jaroslav@68
   697
    public char charValue() {
jaroslav@68
   698
        return value;
jaroslav@68
   699
    }
jaroslav@68
   700
jaroslav@68
   701
    /**
jaroslav@68
   702
     * Returns a hash code for this {@code Character}; equal to the result
jaroslav@68
   703
     * of invoking {@code charValue()}.
jaroslav@68
   704
     *
jaroslav@68
   705
     * @return a hash code value for this {@code Character}
jaroslav@68
   706
     */
jaroslav@68
   707
    public int hashCode() {
jaroslav@68
   708
        return (int)value;
jaroslav@68
   709
    }
jaroslav@68
   710
jaroslav@68
   711
    /**
jaroslav@68
   712
     * Compares this object against the specified object.
jaroslav@68
   713
     * The result is {@code true} if and only if the argument is not
jaroslav@68
   714
     * {@code null} and is a {@code Character} object that
jaroslav@68
   715
     * represents the same {@code char} value as this object.
jaroslav@68
   716
     *
jaroslav@68
   717
     * @param   obj   the object to compare with.
jaroslav@68
   718
     * @return  {@code true} if the objects are the same;
jaroslav@68
   719
     *          {@code false} otherwise.
jaroslav@68
   720
     */
jaroslav@68
   721
    public boolean equals(Object obj) {
jaroslav@68
   722
        if (obj instanceof Character) {
jaroslav@68
   723
            return value == ((Character)obj).charValue();
jaroslav@68
   724
        }
jaroslav@68
   725
        return false;
jaroslav@68
   726
    }
jaroslav@68
   727
jaroslav@68
   728
    /**
jaroslav@68
   729
     * Returns a {@code String} object representing this
jaroslav@68
   730
     * {@code Character}'s value.  The result is a string of
jaroslav@68
   731
     * length 1 whose sole component is the primitive
jaroslav@68
   732
     * {@code char} value represented by this
jaroslav@68
   733
     * {@code Character} object.
jaroslav@68
   734
     *
jaroslav@68
   735
     * @return  a string representation of this object.
jaroslav@68
   736
     */
jaroslav@68
   737
    public String toString() {
jaroslav@68
   738
        char buf[] = {value};
jaroslav@68
   739
        return String.valueOf(buf);
jaroslav@68
   740
    }
jaroslav@68
   741
jaroslav@68
   742
    /**
jaroslav@68
   743
     * Returns a {@code String} object representing the
jaroslav@68
   744
     * specified {@code char}.  The result is a string of length
jaroslav@68
   745
     * 1 consisting solely of the specified {@code char}.
jaroslav@68
   746
     *
jaroslav@68
   747
     * @param c the {@code char} to be converted
jaroslav@68
   748
     * @return the string representation of the specified {@code char}
jaroslav@68
   749
     * @since 1.4
jaroslav@68
   750
     */
jaroslav@68
   751
    public static String toString(char c) {
jaroslav@68
   752
        return String.valueOf(c);
jaroslav@68
   753
    }
jaroslav@68
   754
jaroslav@68
   755
    /**
jaroslav@68
   756
     * Determines whether the specified code point is a valid
jaroslav@68
   757
     * <a href="http://www.unicode.org/glossary/#code_point">
jaroslav@68
   758
     * Unicode code point value</a>.
jaroslav@68
   759
     *
jaroslav@68
   760
     * @param  codePoint the Unicode code point to be tested
jaroslav@68
   761
     * @return {@code true} if the specified code point value is between
jaroslav@68
   762
     *         {@link #MIN_CODE_POINT} and
jaroslav@68
   763
     *         {@link #MAX_CODE_POINT} inclusive;
jaroslav@68
   764
     *         {@code false} otherwise.
jaroslav@68
   765
     * @since  1.5
jaroslav@68
   766
     */
jaroslav@68
   767
    public static boolean isValidCodePoint(int codePoint) {
jaroslav@68
   768
        // Optimized form of:
jaroslav@68
   769
        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
jaroslav@68
   770
        int plane = codePoint >>> 16;
jaroslav@68
   771
        return plane < ((MAX_CODE_POINT + 1) >>> 16);
jaroslav@68
   772
    }
jaroslav@68
   773
jaroslav@68
   774
    /**
jaroslav@68
   775
     * Determines whether the specified character (Unicode code point)
jaroslav@68
   776
     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
jaroslav@68
   777
     * Such code points can be represented using a single {@code char}.
jaroslav@68
   778
     *
jaroslav@68
   779
     * @param  codePoint the character (Unicode code point) to be tested
jaroslav@68
   780
     * @return {@code true} if the specified code point is between
jaroslav@68
   781
     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
jaroslav@68
   782
     *         {@code false} otherwise.
jaroslav@68
   783
     * @since  1.7
jaroslav@68
   784
     */
jaroslav@68
   785
    public static boolean isBmpCodePoint(int codePoint) {
jaroslav@68
   786
        return codePoint >>> 16 == 0;
jaroslav@68
   787
        // Optimized form of:
jaroslav@68
   788
        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
jaroslav@68
   789
        // We consistently use logical shift (>>>) to facilitate
jaroslav@68
   790
        // additional runtime optimizations.
jaroslav@68
   791
    }
jaroslav@68
   792
jaroslav@68
   793
    /**
jaroslav@68
   794
     * Determines whether the specified character (Unicode code point)
jaroslav@68
   795
     * is in the <a href="#supplementary">supplementary character</a> range.
jaroslav@68
   796
     *
jaroslav@68
   797
     * @param  codePoint the character (Unicode code point) to be tested
jaroslav@68
   798
     * @return {@code true} if the specified code point is between
jaroslav@68
   799
     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
jaroslav@68
   800
     *         {@link #MAX_CODE_POINT} inclusive;
jaroslav@68
   801
     *         {@code false} otherwise.
jaroslav@68
   802
     * @since  1.5
jaroslav@68
   803
     */
jaroslav@68
   804
    public static boolean isSupplementaryCodePoint(int codePoint) {
jaroslav@68
   805
        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68
   806
            && codePoint <  MAX_CODE_POINT + 1;
jaroslav@68
   807
    }
jaroslav@68
   808
jaroslav@68
   809
    /**
jaroslav@68
   810
     * Determines if the given {@code char} value is a
jaroslav@68
   811
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68
   812
     * Unicode high-surrogate code unit</a>
jaroslav@68
   813
     * (also known as <i>leading-surrogate code unit</i>).
jaroslav@68
   814
     *
jaroslav@68
   815
     * <p>Such values do not represent characters by themselves,
jaroslav@68
   816
     * but are used in the representation of
jaroslav@68
   817
     * <a href="#supplementary">supplementary characters</a>
jaroslav@68
   818
     * in the UTF-16 encoding.
jaroslav@68
   819
     *
jaroslav@68
   820
     * @param  ch the {@code char} value to be tested.
jaroslav@68
   821
     * @return {@code true} if the {@code char} value is between
jaroslav@68
   822
     *         {@link #MIN_HIGH_SURROGATE} and
jaroslav@68
   823
     *         {@link #MAX_HIGH_SURROGATE} inclusive;
jaroslav@68
   824
     *         {@code false} otherwise.
jaroslav@68
   825
     * @see    Character#isLowSurrogate(char)
jaroslav@68
   826
     * @see    Character.UnicodeBlock#of(int)
jaroslav@68
   827
     * @since  1.5
jaroslav@68
   828
     */
jaroslav@68
   829
    public static boolean isHighSurrogate(char ch) {
jaroslav@68
   830
        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
jaroslav@68
   831
        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
jaroslav@68
   832
    }
jaroslav@68
   833
jaroslav@68
   834
    /**
jaroslav@68
   835
     * Determines if the given {@code char} value is a
jaroslav@68
   836
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68
   837
     * Unicode low-surrogate code unit</a>
jaroslav@68
   838
     * (also known as <i>trailing-surrogate code unit</i>).
jaroslav@68
   839
     *
jaroslav@68
   840
     * <p>Such values do not represent characters by themselves,
jaroslav@68
   841
     * but are used in the representation of
jaroslav@68
   842
     * <a href="#supplementary">supplementary characters</a>
jaroslav@68
   843
     * in the UTF-16 encoding.
jaroslav@68
   844
     *
jaroslav@68
   845
     * @param  ch the {@code char} value to be tested.
jaroslav@68
   846
     * @return {@code true} if the {@code char} value is between
jaroslav@68
   847
     *         {@link #MIN_LOW_SURROGATE} and
jaroslav@68
   848
     *         {@link #MAX_LOW_SURROGATE} inclusive;
jaroslav@68
   849
     *         {@code false} otherwise.
jaroslav@68
   850
     * @see    Character#isHighSurrogate(char)
jaroslav@68
   851
     * @since  1.5
jaroslav@68
   852
     */
jaroslav@68
   853
    public static boolean isLowSurrogate(char ch) {
jaroslav@68
   854
        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
jaroslav@68
   855
    }
jaroslav@68
   856
jaroslav@68
   857
    /**
jaroslav@68
   858
     * Determines if the given {@code char} value is a Unicode
jaroslav@68
   859
     * <i>surrogate code unit</i>.
jaroslav@68
   860
     *
jaroslav@68
   861
     * <p>Such values do not represent characters by themselves,
jaroslav@68
   862
     * but are used in the representation of
jaroslav@68
   863
     * <a href="#supplementary">supplementary characters</a>
jaroslav@68
   864
     * in the UTF-16 encoding.
jaroslav@68
   865
     *
jaroslav@68
   866
     * <p>A char value is a surrogate code unit if and only if it is either
jaroslav@68
   867
     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
jaroslav@68
   868
     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
jaroslav@68
   869
     *
jaroslav@68
   870
     * @param  ch the {@code char} value to be tested.
jaroslav@68
   871
     * @return {@code true} if the {@code char} value is between
jaroslav@68
   872
     *         {@link #MIN_SURROGATE} and
jaroslav@68
   873
     *         {@link #MAX_SURROGATE} inclusive;
jaroslav@68
   874
     *         {@code false} otherwise.
jaroslav@68
   875
     * @since  1.7
jaroslav@68
   876
     */
jaroslav@68
   877
    public static boolean isSurrogate(char ch) {
jaroslav@68
   878
        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
jaroslav@68
   879
    }
jaroslav@68
   880
jaroslav@68
   881
    /**
jaroslav@68
   882
     * Determines whether the specified pair of {@code char}
jaroslav@68
   883
     * values is a valid
jaroslav@68
   884
     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68
   885
     * Unicode surrogate pair</a>.
jaroslav@68
   886
jaroslav@68
   887
     * <p>This method is equivalent to the expression:
jaroslav@68
   888
     * <blockquote><pre>
jaroslav@68
   889
     * isHighSurrogate(high) && isLowSurrogate(low)
jaroslav@68
   890
     * </pre></blockquote>
jaroslav@68
   891
     *
jaroslav@68
   892
     * @param  high the high-surrogate code value to be tested
jaroslav@68
   893
     * @param  low the low-surrogate code value to be tested
jaroslav@68
   894
     * @return {@code true} if the specified high and
jaroslav@68
   895
     * low-surrogate code values represent a valid surrogate pair;
jaroslav@68
   896
     * {@code false} otherwise.
jaroslav@68
   897
     * @since  1.5
jaroslav@68
   898
     */
jaroslav@68
   899
    public static boolean isSurrogatePair(char high, char low) {
jaroslav@68
   900
        return isHighSurrogate(high) && isLowSurrogate(low);
jaroslav@68
   901
    }
jaroslav@68
   902
jaroslav@68
   903
    /**
jaroslav@68
   904
     * Determines the number of {@code char} values needed to
jaroslav@68
   905
     * represent the specified character (Unicode code point). If the
jaroslav@68
   906
     * specified character is equal to or greater than 0x10000, then
jaroslav@68
   907
     * the method returns 2. Otherwise, the method returns 1.
jaroslav@68
   908
     *
jaroslav@68
   909
     * <p>This method doesn't validate the specified character to be a
jaroslav@68
   910
     * valid Unicode code point. The caller must validate the
jaroslav@68
   911
     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
jaroslav@68
   912
     * if necessary.
jaroslav@68
   913
     *
jaroslav@68
   914
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
   915
     * @return  2 if the character is a valid supplementary character; 1 otherwise.
jaroslav@68
   916
     * @see     Character#isSupplementaryCodePoint(int)
jaroslav@68
   917
     * @since   1.5
jaroslav@68
   918
     */
jaroslav@68
   919
    public static int charCount(int codePoint) {
jaroslav@68
   920
        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
jaroslav@68
   921
    }
jaroslav@68
   922
jaroslav@68
   923
    /**
jaroslav@68
   924
     * Converts the specified surrogate pair to its supplementary code
jaroslav@68
   925
     * point value. This method does not validate the specified
jaroslav@68
   926
     * surrogate pair. The caller must validate it using {@link
jaroslav@68
   927
     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
jaroslav@68
   928
     *
jaroslav@68
   929
     * @param  high the high-surrogate code unit
jaroslav@68
   930
     * @param  low the low-surrogate code unit
jaroslav@68
   931
     * @return the supplementary code point composed from the
jaroslav@68
   932
     *         specified surrogate pair.
jaroslav@68
   933
     * @since  1.5
jaroslav@68
   934
     */
jaroslav@68
   935
    public static int toCodePoint(char high, char low) {
jaroslav@68
   936
        // Optimized form of:
jaroslav@68
   937
        // return ((high - MIN_HIGH_SURROGATE) << 10)
jaroslav@68
   938
        //         + (low - MIN_LOW_SURROGATE)
jaroslav@68
   939
        //         + MIN_SUPPLEMENTARY_CODE_POINT;
jaroslav@68
   940
        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
jaroslav@68
   941
                                       - (MIN_HIGH_SURROGATE << 10)
jaroslav@68
   942
                                       - MIN_LOW_SURROGATE);
jaroslav@68
   943
    }
jaroslav@68
   944
jaroslav@68
   945
    /**
jaroslav@68
   946
     * Returns the code point at the given index of the
jaroslav@68
   947
     * {@code CharSequence}. If the {@code char} value at
jaroslav@68
   948
     * the given index in the {@code CharSequence} is in the
jaroslav@68
   949
     * high-surrogate range, the following index is less than the
jaroslav@68
   950
     * length of the {@code CharSequence}, and the
jaroslav@68
   951
     * {@code char} value at the following index is in the
jaroslav@68
   952
     * low-surrogate range, then the supplementary code point
jaroslav@68
   953
     * corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68
   954
     * the {@code char} value at the given index is returned.
jaroslav@68
   955
     *
jaroslav@68
   956
     * @param seq a sequence of {@code char} values (Unicode code
jaroslav@68
   957
     * units)
jaroslav@68
   958
     * @param index the index to the {@code char} values (Unicode
jaroslav@68
   959
     * code units) in {@code seq} to be converted
jaroslav@68
   960
     * @return the Unicode code point at the given index
jaroslav@68
   961
     * @exception NullPointerException if {@code seq} is null.
jaroslav@68
   962
     * @exception IndexOutOfBoundsException if the value
jaroslav@68
   963
     * {@code index} is negative or not less than
jaroslav@68
   964
     * {@link CharSequence#length() seq.length()}.
jaroslav@68
   965
     * @since  1.5
jaroslav@68
   966
     */
jaroslav@68
   967
    public static int codePointAt(CharSequence seq, int index) {
jaroslav@68
   968
        char c1 = seq.charAt(index++);
jaroslav@68
   969
        if (isHighSurrogate(c1)) {
jaroslav@68
   970
            if (index < seq.length()) {
jaroslav@68
   971
                char c2 = seq.charAt(index);
jaroslav@68
   972
                if (isLowSurrogate(c2)) {
jaroslav@68
   973
                    return toCodePoint(c1, c2);
jaroslav@68
   974
                }
jaroslav@68
   975
            }
jaroslav@68
   976
        }
jaroslav@68
   977
        return c1;
jaroslav@68
   978
    }
jaroslav@68
   979
jaroslav@68
   980
    /**
jaroslav@68
   981
     * Returns the code point at the given index of the
jaroslav@68
   982
     * {@code char} array. If the {@code char} value at
jaroslav@68
   983
     * the given index in the {@code char} array is in the
jaroslav@68
   984
     * high-surrogate range, the following index is less than the
jaroslav@68
   985
     * length of the {@code char} array, and the
jaroslav@68
   986
     * {@code char} value at the following index is in the
jaroslav@68
   987
     * low-surrogate range, then the supplementary code point
jaroslav@68
   988
     * corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68
   989
     * the {@code char} value at the given index is returned.
jaroslav@68
   990
     *
jaroslav@68
   991
     * @param a the {@code char} array
jaroslav@68
   992
     * @param index the index to the {@code char} values (Unicode
jaroslav@68
   993
     * code units) in the {@code char} array to be converted
jaroslav@68
   994
     * @return the Unicode code point at the given index
jaroslav@68
   995
     * @exception NullPointerException if {@code a} is null.
jaroslav@68
   996
     * @exception IndexOutOfBoundsException if the value
jaroslav@68
   997
     * {@code index} is negative or not less than
jaroslav@68
   998
     * the length of the {@code char} array.
jaroslav@68
   999
     * @since  1.5
jaroslav@68
  1000
     */
jaroslav@68
  1001
    public static int codePointAt(char[] a, int index) {
jaroslav@68
  1002
        return codePointAtImpl(a, index, a.length);
jaroslav@68
  1003
    }
jaroslav@68
  1004
jaroslav@68
  1005
    /**
jaroslav@68
  1006
     * Returns the code point at the given index of the
jaroslav@68
  1007
     * {@code char} array, where only array elements with
jaroslav@68
  1008
     * {@code index} less than {@code limit} can be used. If
jaroslav@68
  1009
     * the {@code char} value at the given index in the
jaroslav@68
  1010
     * {@code char} array is in the high-surrogate range, the
jaroslav@68
  1011
     * following index is less than the {@code limit}, and the
jaroslav@68
  1012
     * {@code char} value at the following index is in the
jaroslav@68
  1013
     * low-surrogate range, then the supplementary code point
jaroslav@68
  1014
     * corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68
  1015
     * the {@code char} value at the given index is returned.
jaroslav@68
  1016
     *
jaroslav@68
  1017
     * @param a the {@code char} array
jaroslav@68
  1018
     * @param index the index to the {@code char} values (Unicode
jaroslav@68
  1019
     * code units) in the {@code char} array to be converted
jaroslav@68
  1020
     * @param limit the index after the last array element that
jaroslav@68
  1021
     * can be used in the {@code char} array
jaroslav@68
  1022
     * @return the Unicode code point at the given index
jaroslav@68
  1023
     * @exception NullPointerException if {@code a} is null.
jaroslav@68
  1024
     * @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68
  1025
     * argument is negative or not less than the {@code limit}
jaroslav@68
  1026
     * argument, or if the {@code limit} argument is negative or
jaroslav@68
  1027
     * greater than the length of the {@code char} array.
jaroslav@68
  1028
     * @since  1.5
jaroslav@68
  1029
     */
jaroslav@68
  1030
    public static int codePointAt(char[] a, int index, int limit) {
jaroslav@68
  1031
        if (index >= limit || limit < 0 || limit > a.length) {
jaroslav@68
  1032
            throw new IndexOutOfBoundsException();
jaroslav@68
  1033
        }
jaroslav@68
  1034
        return codePointAtImpl(a, index, limit);
jaroslav@68
  1035
    }
jaroslav@68
  1036
jaroslav@68
  1037
    // throws ArrayIndexOutofBoundsException if index out of bounds
jaroslav@68
  1038
    static int codePointAtImpl(char[] a, int index, int limit) {
jaroslav@68
  1039
        char c1 = a[index++];
jaroslav@68
  1040
        if (isHighSurrogate(c1)) {
jaroslav@68
  1041
            if (index < limit) {
jaroslav@68
  1042
                char c2 = a[index];
jaroslav@68
  1043
                if (isLowSurrogate(c2)) {
jaroslav@68
  1044
                    return toCodePoint(c1, c2);
jaroslav@68
  1045
                }
jaroslav@68
  1046
            }
jaroslav@68
  1047
        }
jaroslav@68
  1048
        return c1;
jaroslav@68
  1049
    }
jaroslav@68
  1050
jaroslav@68
  1051
    /**
jaroslav@68
  1052
     * Returns the code point preceding the given index of the
jaroslav@68
  1053
     * {@code CharSequence}. If the {@code char} value at
jaroslav@68
  1054
     * {@code (index - 1)} in the {@code CharSequence} is in
jaroslav@68
  1055
     * the low-surrogate range, {@code (index - 2)} is not
jaroslav@68
  1056
     * negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68
  1057
     * in the {@code CharSequence} is in the
jaroslav@68
  1058
     * high-surrogate range, then the supplementary code point
jaroslav@68
  1059
     * corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68
  1060
     * the {@code char} value at {@code (index - 1)} is
jaroslav@68
  1061
     * returned.
jaroslav@68
  1062
     *
jaroslav@68
  1063
     * @param seq the {@code CharSequence} instance
jaroslav@68
  1064
     * @param index the index following the code point that should be returned
jaroslav@68
  1065
     * @return the Unicode code point value before the given index.
jaroslav@68
  1066
     * @exception NullPointerException if {@code seq} is null.
jaroslav@68
  1067
     * @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68
  1068
     * argument is less than 1 or greater than {@link
jaroslav@68
  1069
     * CharSequence#length() seq.length()}.
jaroslav@68
  1070
     * @since  1.5
jaroslav@68
  1071
     */
jaroslav@68
  1072
    public static int codePointBefore(CharSequence seq, int index) {
jaroslav@68
  1073
        char c2 = seq.charAt(--index);
jaroslav@68
  1074
        if (isLowSurrogate(c2)) {
jaroslav@68
  1075
            if (index > 0) {
jaroslav@68
  1076
                char c1 = seq.charAt(--index);
jaroslav@68
  1077
                if (isHighSurrogate(c1)) {
jaroslav@68
  1078
                    return toCodePoint(c1, c2);
jaroslav@68
  1079
                }
jaroslav@68
  1080
            }
jaroslav@68
  1081
        }
jaroslav@68
  1082
        return c2;
jaroslav@68
  1083
    }
jaroslav@68
  1084
jaroslav@68
  1085
    /**
jaroslav@68
  1086
     * Returns the code point preceding the given index of the
jaroslav@68
  1087
     * {@code char} array. If the {@code char} value at
jaroslav@68
  1088
     * {@code (index - 1)} in the {@code char} array is in
jaroslav@68
  1089
     * the low-surrogate range, {@code (index - 2)} is not
jaroslav@68
  1090
     * negative, and the {@code char} value at {@code (index - 2)}
jaroslav@68
  1091
     * in the {@code char} array is in the
jaroslav@68
  1092
     * high-surrogate range, then the supplementary code point
jaroslav@68
  1093
     * corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68
  1094
     * the {@code char} value at {@code (index - 1)} is
jaroslav@68
  1095
     * returned.
jaroslav@68
  1096
     *
jaroslav@68
  1097
     * @param a the {@code char} array
jaroslav@68
  1098
     * @param index the index following the code point that should be returned
jaroslav@68
  1099
     * @return the Unicode code point value before the given index.
jaroslav@68
  1100
     * @exception NullPointerException if {@code a} is null.
jaroslav@68
  1101
     * @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68
  1102
     * argument is less than 1 or greater than the length of the
jaroslav@68
  1103
     * {@code char} array
jaroslav@68
  1104
     * @since  1.5
jaroslav@68
  1105
     */
jaroslav@68
  1106
    public static int codePointBefore(char[] a, int index) {
jaroslav@68
  1107
        return codePointBeforeImpl(a, index, 0);
jaroslav@68
  1108
    }
jaroslav@68
  1109
jaroslav@68
  1110
    /**
jaroslav@68
  1111
     * Returns the code point preceding the given index of the
jaroslav@68
  1112
     * {@code char} array, where only array elements with
jaroslav@68
  1113
     * {@code index} greater than or equal to {@code start}
jaroslav@68
  1114
     * can be used. If the {@code char} value at {@code (index - 1)}
jaroslav@68
  1115
     * in the {@code char} array is in the
jaroslav@68
  1116
     * low-surrogate range, {@code (index - 2)} is not less than
jaroslav@68
  1117
     * {@code start}, and the {@code char} value at
jaroslav@68
  1118
     * {@code (index - 2)} in the {@code char} array is in
jaroslav@68
  1119
     * the high-surrogate range, then the supplementary code point
jaroslav@68
  1120
     * corresponding to this surrogate pair is returned. Otherwise,
jaroslav@68
  1121
     * the {@code char} value at {@code (index - 1)} is
jaroslav@68
  1122
     * returned.
jaroslav@68
  1123
     *
jaroslav@68
  1124
     * @param a the {@code char} array
jaroslav@68
  1125
     * @param index the index following the code point that should be returned
jaroslav@68
  1126
     * @param start the index of the first array element in the
jaroslav@68
  1127
     * {@code char} array
jaroslav@68
  1128
     * @return the Unicode code point value before the given index.
jaroslav@68
  1129
     * @exception NullPointerException if {@code a} is null.
jaroslav@68
  1130
     * @exception IndexOutOfBoundsException if the {@code index}
jaroslav@68
  1131
     * argument is not greater than the {@code start} argument or
jaroslav@68
  1132
     * is greater than the length of the {@code char} array, or
jaroslav@68
  1133
     * if the {@code start} argument is negative or not less than
jaroslav@68
  1134
     * the length of the {@code char} array.
jaroslav@68
  1135
     * @since  1.5
jaroslav@68
  1136
     */
jaroslav@68
  1137
    public static int codePointBefore(char[] a, int index, int start) {
jaroslav@68
  1138
        if (index <= start || start < 0 || start >= a.length) {
jaroslav@68
  1139
            throw new IndexOutOfBoundsException();
jaroslav@68
  1140
        }
jaroslav@68
  1141
        return codePointBeforeImpl(a, index, start);
jaroslav@68
  1142
    }
jaroslav@68
  1143
jaroslav@68
  1144
    // throws ArrayIndexOutofBoundsException if index-1 out of bounds
jaroslav@68
  1145
    static int codePointBeforeImpl(char[] a, int index, int start) {
jaroslav@68
  1146
        char c2 = a[--index];
jaroslav@68
  1147
        if (isLowSurrogate(c2)) {
jaroslav@68
  1148
            if (index > start) {
jaroslav@68
  1149
                char c1 = a[--index];
jaroslav@68
  1150
                if (isHighSurrogate(c1)) {
jaroslav@68
  1151
                    return toCodePoint(c1, c2);
jaroslav@68
  1152
                }
jaroslav@68
  1153
            }
jaroslav@68
  1154
        }
jaroslav@68
  1155
        return c2;
jaroslav@68
  1156
    }
jaroslav@68
  1157
jaroslav@68
  1158
    /**
jaroslav@68
  1159
     * Returns the leading surrogate (a
jaroslav@68
  1160
     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
jaroslav@68
  1161
     * high surrogate code unit</a>) of the
jaroslav@68
  1162
     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68
  1163
     * surrogate pair</a>
jaroslav@68
  1164
     * representing the specified supplementary character (Unicode
jaroslav@68
  1165
     * code point) in the UTF-16 encoding.  If the specified character
jaroslav@68
  1166
     * is not a
jaroslav@68
  1167
     * <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68
  1168
     * an unspecified {@code char} is returned.
jaroslav@68
  1169
     *
jaroslav@68
  1170
     * <p>If
jaroslav@68
  1171
     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68
  1172
     * is {@code true}, then
jaroslav@68
  1173
     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
jaroslav@68
  1174
     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
jaroslav@68
  1175
     * are also always {@code true}.
jaroslav@68
  1176
     *
jaroslav@68
  1177
     * @param   codePoint a supplementary character (Unicode code point)
jaroslav@68
  1178
     * @return  the leading surrogate code unit used to represent the
jaroslav@68
  1179
     *          character in the UTF-16 encoding
jaroslav@68
  1180
     * @since   1.7
jaroslav@68
  1181
     */
jaroslav@68
  1182
    public static char highSurrogate(int codePoint) {
jaroslav@68
  1183
        return (char) ((codePoint >>> 10)
jaroslav@68
  1184
            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
jaroslav@68
  1185
    }
jaroslav@68
  1186
jaroslav@68
  1187
    /**
jaroslav@68
  1188
     * Returns the trailing surrogate (a
jaroslav@68
  1189
     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
jaroslav@68
  1190
     * low surrogate code unit</a>) of the
jaroslav@68
  1191
     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
jaroslav@68
  1192
     * surrogate pair</a>
jaroslav@68
  1193
     * representing the specified supplementary character (Unicode
jaroslav@68
  1194
     * code point) in the UTF-16 encoding.  If the specified character
jaroslav@68
  1195
     * is not a
jaroslav@68
  1196
     * <a href="Character.html#supplementary">supplementary character</a>,
jaroslav@68
  1197
     * an unspecified {@code char} is returned.
jaroslav@68
  1198
     *
jaroslav@68
  1199
     * <p>If
jaroslav@68
  1200
     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
jaroslav@68
  1201
     * is {@code true}, then
jaroslav@68
  1202
     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
jaroslav@68
  1203
     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
jaroslav@68
  1204
     * are also always {@code true}.
jaroslav@68
  1205
     *
jaroslav@68
  1206
     * @param   codePoint a supplementary character (Unicode code point)
jaroslav@68
  1207
     * @return  the trailing surrogate code unit used to represent the
jaroslav@68
  1208
     *          character in the UTF-16 encoding
jaroslav@68
  1209
     * @since   1.7
jaroslav@68
  1210
     */
jaroslav@68
  1211
    public static char lowSurrogate(int codePoint) {
jaroslav@68
  1212
        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
jaroslav@68
  1213
    }
jaroslav@68
  1214
jaroslav@68
  1215
    /**
jaroslav@68
  1216
     * Converts the specified character (Unicode code point) to its
jaroslav@68
  1217
     * UTF-16 representation. If the specified code point is a BMP
jaroslav@68
  1218
     * (Basic Multilingual Plane or Plane 0) value, the same value is
jaroslav@68
  1219
     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
jaroslav@68
  1220
     * specified code point is a supplementary character, its
jaroslav@68
  1221
     * surrogate values are stored in {@code dst[dstIndex]}
jaroslav@68
  1222
     * (high-surrogate) and {@code dst[dstIndex+1]}
jaroslav@68
  1223
     * (low-surrogate), and 2 is returned.
jaroslav@68
  1224
     *
jaroslav@68
  1225
     * @param  codePoint the character (Unicode code point) to be converted.
jaroslav@68
  1226
     * @param  dst an array of {@code char} in which the
jaroslav@68
  1227
     * {@code codePoint}'s UTF-16 value is stored.
jaroslav@68
  1228
     * @param dstIndex the start index into the {@code dst}
jaroslav@68
  1229
     * array where the converted value is stored.
jaroslav@68
  1230
     * @return 1 if the code point is a BMP code point, 2 if the
jaroslav@68
  1231
     * code point is a supplementary code point.
jaroslav@68
  1232
     * @exception IllegalArgumentException if the specified
jaroslav@68
  1233
     * {@code codePoint} is not a valid Unicode code point.
jaroslav@68
  1234
     * @exception NullPointerException if the specified {@code dst} is null.
jaroslav@68
  1235
     * @exception IndexOutOfBoundsException if {@code dstIndex}
jaroslav@68
  1236
     * is negative or not less than {@code dst.length}, or if
jaroslav@68
  1237
     * {@code dst} at {@code dstIndex} doesn't have enough
jaroslav@68
  1238
     * array element(s) to store the resulting {@code char}
jaroslav@68
  1239
     * value(s). (If {@code dstIndex} is equal to
jaroslav@68
  1240
     * {@code dst.length-1} and the specified
jaroslav@68
  1241
     * {@code codePoint} is a supplementary character, the
jaroslav@68
  1242
     * high-surrogate value is not stored in
jaroslav@68
  1243
     * {@code dst[dstIndex]}.)
jaroslav@68
  1244
     * @since  1.5
jaroslav@68
  1245
     */
jaroslav@68
  1246
    public static int toChars(int codePoint, char[] dst, int dstIndex) {
jaroslav@68
  1247
        if (isBmpCodePoint(codePoint)) {
jaroslav@68
  1248
            dst[dstIndex] = (char) codePoint;
jaroslav@68
  1249
            return 1;
jaroslav@68
  1250
        } else if (isValidCodePoint(codePoint)) {
jaroslav@68
  1251
            toSurrogates(codePoint, dst, dstIndex);
jaroslav@68
  1252
            return 2;
jaroslav@68
  1253
        } else {
jaroslav@68
  1254
            throw new IllegalArgumentException();
jaroslav@68
  1255
        }
jaroslav@68
  1256
    }
jaroslav@68
  1257
jaroslav@68
  1258
    /**
jaroslav@68
  1259
     * Converts the specified character (Unicode code point) to its
jaroslav@68
  1260
     * UTF-16 representation stored in a {@code char} array. If
jaroslav@68
  1261
     * the specified code point is a BMP (Basic Multilingual Plane or
jaroslav@68
  1262
     * Plane 0) value, the resulting {@code char} array has
jaroslav@68
  1263
     * the same value as {@code codePoint}. If the specified code
jaroslav@68
  1264
     * point is a supplementary code point, the resulting
jaroslav@68
  1265
     * {@code char} array has the corresponding surrogate pair.
jaroslav@68
  1266
     *
jaroslav@68
  1267
     * @param  codePoint a Unicode code point
jaroslav@68
  1268
     * @return a {@code char} array having
jaroslav@68
  1269
     *         {@code codePoint}'s UTF-16 representation.
jaroslav@68
  1270
     * @exception IllegalArgumentException if the specified
jaroslav@68
  1271
     * {@code codePoint} is not a valid Unicode code point.
jaroslav@68
  1272
     * @since  1.5
jaroslav@68
  1273
     */
jaroslav@68
  1274
    public static char[] toChars(int codePoint) {
jaroslav@68
  1275
        if (isBmpCodePoint(codePoint)) {
jaroslav@68
  1276
            return new char[] { (char) codePoint };
jaroslav@68
  1277
        } else if (isValidCodePoint(codePoint)) {
jaroslav@68
  1278
            char[] result = new char[2];
jaroslav@68
  1279
            toSurrogates(codePoint, result, 0);
jaroslav@68
  1280
            return result;
jaroslav@68
  1281
        } else {
jaroslav@68
  1282
            throw new IllegalArgumentException();
jaroslav@68
  1283
        }
jaroslav@68
  1284
    }
jaroslav@68
  1285
jaroslav@68
  1286
    static void toSurrogates(int codePoint, char[] dst, int index) {
jaroslav@68
  1287
        // We write elements "backwards" to guarantee all-or-nothing
jaroslav@68
  1288
        dst[index+1] = lowSurrogate(codePoint);
jaroslav@68
  1289
        dst[index] = highSurrogate(codePoint);
jaroslav@68
  1290
    }
jaroslav@68
  1291
jaroslav@68
  1292
    /**
jaroslav@68
  1293
     * Returns the number of Unicode code points in the text range of
jaroslav@68
  1294
     * the specified char sequence. The text range begins at the
jaroslav@68
  1295
     * specified {@code beginIndex} and extends to the
jaroslav@68
  1296
     * {@code char} at index {@code endIndex - 1}. Thus the
jaroslav@68
  1297
     * length (in {@code char}s) of the text range is
jaroslav@68
  1298
     * {@code endIndex-beginIndex}. Unpaired surrogates within
jaroslav@68
  1299
     * the text range count as one code point each.
jaroslav@68
  1300
     *
jaroslav@68
  1301
     * @param seq the char sequence
jaroslav@68
  1302
     * @param beginIndex the index to the first {@code char} of
jaroslav@68
  1303
     * the text range.
jaroslav@68
  1304
     * @param endIndex the index after the last {@code char} of
jaroslav@68
  1305
     * the text range.
jaroslav@68
  1306
     * @return the number of Unicode code points in the specified text
jaroslav@68
  1307
     * range
jaroslav@68
  1308
     * @exception NullPointerException if {@code seq} is null.
jaroslav@68
  1309
     * @exception IndexOutOfBoundsException if the
jaroslav@68
  1310
     * {@code beginIndex} is negative, or {@code endIndex}
jaroslav@68
  1311
     * is larger than the length of the given sequence, or
jaroslav@68
  1312
     * {@code beginIndex} is larger than {@code endIndex}.
jaroslav@68
  1313
     * @since  1.5
jaroslav@68
  1314
     */
jaroslav@68
  1315
    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
jaroslav@68
  1316
        int length = seq.length();
jaroslav@68
  1317
        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
jaroslav@68
  1318
            throw new IndexOutOfBoundsException();
jaroslav@68
  1319
        }
jaroslav@68
  1320
        int n = endIndex - beginIndex;
jaroslav@68
  1321
        for (int i = beginIndex; i < endIndex; ) {
jaroslav@68
  1322
            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
jaroslav@68
  1323
                isLowSurrogate(seq.charAt(i))) {
jaroslav@68
  1324
                n--;
jaroslav@68
  1325
                i++;
jaroslav@68
  1326
            }
jaroslav@68
  1327
        }
jaroslav@68
  1328
        return n;
jaroslav@68
  1329
    }
jaroslav@68
  1330
jaroslav@68
  1331
    /**
jaroslav@68
  1332
     * Returns the number of Unicode code points in a subarray of the
jaroslav@68
  1333
     * {@code char} array argument. The {@code offset}
jaroslav@68
  1334
     * argument is the index of the first {@code char} of the
jaroslav@68
  1335
     * subarray and the {@code count} argument specifies the
jaroslav@68
  1336
     * length of the subarray in {@code char}s. Unpaired
jaroslav@68
  1337
     * surrogates within the subarray count as one code point each.
jaroslav@68
  1338
     *
jaroslav@68
  1339
     * @param a the {@code char} array
jaroslav@68
  1340
     * @param offset the index of the first {@code char} in the
jaroslav@68
  1341
     * given {@code char} array
jaroslav@68
  1342
     * @param count the length of the subarray in {@code char}s
jaroslav@68
  1343
     * @return the number of Unicode code points in the specified subarray
jaroslav@68
  1344
     * @exception NullPointerException if {@code a} is null.
jaroslav@68
  1345
     * @exception IndexOutOfBoundsException if {@code offset} or
jaroslav@68
  1346
     * {@code count} is negative, or if {@code offset +
jaroslav@68
  1347
     * count} is larger than the length of the given array.
jaroslav@68
  1348
     * @since  1.5
jaroslav@68
  1349
     */
jaroslav@68
  1350
    public static int codePointCount(char[] a, int offset, int count) {
jaroslav@68
  1351
        if (count > a.length - offset || offset < 0 || count < 0) {
jaroslav@68
  1352
            throw new IndexOutOfBoundsException();
jaroslav@68
  1353
        }
jaroslav@68
  1354
        return codePointCountImpl(a, offset, count);
jaroslav@68
  1355
    }
jaroslav@68
  1356
jaroslav@68
  1357
    static int codePointCountImpl(char[] a, int offset, int count) {
jaroslav@68
  1358
        int endIndex = offset + count;
jaroslav@68
  1359
        int n = count;
jaroslav@68
  1360
        for (int i = offset; i < endIndex; ) {
jaroslav@68
  1361
            if (isHighSurrogate(a[i++]) && i < endIndex &&
jaroslav@68
  1362
                isLowSurrogate(a[i])) {
jaroslav@68
  1363
                n--;
jaroslav@68
  1364
                i++;
jaroslav@68
  1365
            }
jaroslav@68
  1366
        }
jaroslav@68
  1367
        return n;
jaroslav@68
  1368
    }
jaroslav@68
  1369
jaroslav@68
  1370
    /**
jaroslav@68
  1371
     * Returns the index within the given char sequence that is offset
jaroslav@68
  1372
     * from the given {@code index} by {@code codePointOffset}
jaroslav@68
  1373
     * code points. Unpaired surrogates within the text range given by
jaroslav@68
  1374
     * {@code index} and {@code codePointOffset} count as
jaroslav@68
  1375
     * one code point each.
jaroslav@68
  1376
     *
jaroslav@68
  1377
     * @param seq the char sequence
jaroslav@68
  1378
     * @param index the index to be offset
jaroslav@68
  1379
     * @param codePointOffset the offset in code points
jaroslav@68
  1380
     * @return the index within the char sequence
jaroslav@68
  1381
     * @exception NullPointerException if {@code seq} is null.
jaroslav@68
  1382
     * @exception IndexOutOfBoundsException if {@code index}
jaroslav@68
  1383
     *   is negative or larger then the length of the char sequence,
jaroslav@68
  1384
     *   or if {@code codePointOffset} is positive and the
jaroslav@68
  1385
     *   subsequence starting with {@code index} has fewer than
jaroslav@68
  1386
     *   {@code codePointOffset} code points, or if
jaroslav@68
  1387
     *   {@code codePointOffset} is negative and the subsequence
jaroslav@68
  1388
     *   before {@code index} has fewer than the absolute value
jaroslav@68
  1389
     *   of {@code codePointOffset} code points.
jaroslav@68
  1390
     * @since 1.5
jaroslav@68
  1391
     */
jaroslav@68
  1392
    public static int offsetByCodePoints(CharSequence seq, int index,
jaroslav@68
  1393
                                         int codePointOffset) {
jaroslav@68
  1394
        int length = seq.length();
jaroslav@68
  1395
        if (index < 0 || index > length) {
jaroslav@68
  1396
            throw new IndexOutOfBoundsException();
jaroslav@68
  1397
        }
jaroslav@68
  1398
jaroslav@68
  1399
        int x = index;
jaroslav@68
  1400
        if (codePointOffset >= 0) {
jaroslav@68
  1401
            int i;
jaroslav@68
  1402
            for (i = 0; x < length && i < codePointOffset; i++) {
jaroslav@68
  1403
                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
jaroslav@68
  1404
                    isLowSurrogate(seq.charAt(x))) {
jaroslav@68
  1405
                    x++;
jaroslav@68
  1406
                }
jaroslav@68
  1407
            }
jaroslav@68
  1408
            if (i < codePointOffset) {
jaroslav@68
  1409
                throw new IndexOutOfBoundsException();
jaroslav@68
  1410
            }
jaroslav@68
  1411
        } else {
jaroslav@68
  1412
            int i;
jaroslav@68
  1413
            for (i = codePointOffset; x > 0 && i < 0; i++) {
jaroslav@68
  1414
                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
jaroslav@68
  1415
                    isHighSurrogate(seq.charAt(x-1))) {
jaroslav@68
  1416
                    x--;
jaroslav@68
  1417
                }
jaroslav@68
  1418
            }
jaroslav@68
  1419
            if (i < 0) {
jaroslav@68
  1420
                throw new IndexOutOfBoundsException();
jaroslav@68
  1421
            }
jaroslav@68
  1422
        }
jaroslav@68
  1423
        return x;
jaroslav@68
  1424
    }
jaroslav@68
  1425
jaroslav@68
  1426
    /**
jaroslav@68
  1427
     * Returns the index within the given {@code char} subarray
jaroslav@68
  1428
     * that is offset from the given {@code index} by
jaroslav@68
  1429
     * {@code codePointOffset} code points. The
jaroslav@68
  1430
     * {@code start} and {@code count} arguments specify a
jaroslav@68
  1431
     * subarray of the {@code char} array. Unpaired surrogates
jaroslav@68
  1432
     * within the text range given by {@code index} and
jaroslav@68
  1433
     * {@code codePointOffset} count as one code point each.
jaroslav@68
  1434
     *
jaroslav@68
  1435
     * @param a the {@code char} array
jaroslav@68
  1436
     * @param start the index of the first {@code char} of the
jaroslav@68
  1437
     * subarray
jaroslav@68
  1438
     * @param count the length of the subarray in {@code char}s
jaroslav@68
  1439
     * @param index the index to be offset
jaroslav@68
  1440
     * @param codePointOffset the offset in code points
jaroslav@68
  1441
     * @return the index within the subarray
jaroslav@68
  1442
     * @exception NullPointerException if {@code a} is null.
jaroslav@68
  1443
     * @exception IndexOutOfBoundsException
jaroslav@68
  1444
     *   if {@code start} or {@code count} is negative,
jaroslav@68
  1445
     *   or if {@code start + count} is larger than the length of
jaroslav@68
  1446
     *   the given array,
jaroslav@68
  1447
     *   or if {@code index} is less than {@code start} or
jaroslav@68
  1448
     *   larger then {@code start + count},
jaroslav@68
  1449
     *   or if {@code codePointOffset} is positive and the text range
jaroslav@68
  1450
     *   starting with {@code index} and ending with {@code start + count - 1}
jaroslav@68
  1451
     *   has fewer than {@code codePointOffset} code
jaroslav@68
  1452
     *   points,
jaroslav@68
  1453
     *   or if {@code codePointOffset} is negative and the text range
jaroslav@68
  1454
     *   starting with {@code start} and ending with {@code index - 1}
jaroslav@68
  1455
     *   has fewer than the absolute value of
jaroslav@68
  1456
     *   {@code codePointOffset} code points.
jaroslav@68
  1457
     * @since 1.5
jaroslav@68
  1458
     */
jaroslav@68
  1459
    public static int offsetByCodePoints(char[] a, int start, int count,
jaroslav@68
  1460
                                         int index, int codePointOffset) {
jaroslav@68
  1461
        if (count > a.length-start || start < 0 || count < 0
jaroslav@68
  1462
            || index < start || index > start+count) {
jaroslav@68
  1463
            throw new IndexOutOfBoundsException();
jaroslav@68
  1464
        }
jaroslav@68
  1465
        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
jaroslav@68
  1466
    }
jaroslav@68
  1467
jaroslav@68
  1468
    static int offsetByCodePointsImpl(char[]a, int start, int count,
jaroslav@68
  1469
                                      int index, int codePointOffset) {
jaroslav@68
  1470
        int x = index;
jaroslav@68
  1471
        if (codePointOffset >= 0) {
jaroslav@68
  1472
            int limit = start + count;
jaroslav@68
  1473
            int i;
jaroslav@68
  1474
            for (i = 0; x < limit && i < codePointOffset; i++) {
jaroslav@68
  1475
                if (isHighSurrogate(a[x++]) && x < limit &&
jaroslav@68
  1476
                    isLowSurrogate(a[x])) {
jaroslav@68
  1477
                    x++;
jaroslav@68
  1478
                }
jaroslav@68
  1479
            }
jaroslav@68
  1480
            if (i < codePointOffset) {
jaroslav@68
  1481
                throw new IndexOutOfBoundsException();
jaroslav@68
  1482
            }
jaroslav@68
  1483
        } else {
jaroslav@68
  1484
            int i;
jaroslav@68
  1485
            for (i = codePointOffset; x > start && i < 0; i++) {
jaroslav@68
  1486
                if (isLowSurrogate(a[--x]) && x > start &&
jaroslav@68
  1487
                    isHighSurrogate(a[x-1])) {
jaroslav@68
  1488
                    x--;
jaroslav@68
  1489
                }
jaroslav@68
  1490
            }
jaroslav@68
  1491
            if (i < 0) {
jaroslav@68
  1492
                throw new IndexOutOfBoundsException();
jaroslav@68
  1493
            }
jaroslav@68
  1494
        }
jaroslav@68
  1495
        return x;
jaroslav@68
  1496
    }
jaroslav@68
  1497
jaroslav@68
  1498
    /**
jaroslav@68
  1499
     * Determines if the specified character is a lowercase character.
jaroslav@68
  1500
     * <p>
jaroslav@68
  1501
     * A character is lowercase if its general category type, provided
jaroslav@68
  1502
     * by {@code Character.getType(ch)}, is
jaroslav@68
  1503
     * {@code LOWERCASE_LETTER}, or it has contributory property
jaroslav@68
  1504
     * Other_Lowercase as defined by the Unicode Standard.
jaroslav@68
  1505
     * <p>
jaroslav@68
  1506
     * The following are examples of lowercase characters:
jaroslav@68
  1507
     * <p><blockquote><pre>
jaroslav@68
  1508
     * a b c d e f g h i j k l m n o p q r s t u v w x y z
jaroslav@68
  1509
     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
jaroslav@68
  1510
     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
jaroslav@68
  1511
     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
jaroslav@68
  1512
     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
jaroslav@68
  1513
     * </pre></blockquote>
jaroslav@68
  1514
     * <p> Many other Unicode characters are lowercase too.
jaroslav@68
  1515
     *
jaroslav@68
  1516
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1517
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1518
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1519
     * the {@link #isLowerCase(int)} method.
jaroslav@68
  1520
     *
jaroslav@68
  1521
     * @param   ch   the character to be tested.
jaroslav@68
  1522
     * @return  {@code true} if the character is lowercase;
jaroslav@68
  1523
     *          {@code false} otherwise.
jaroslav@68
  1524
     * @see     Character#isLowerCase(char)
jaroslav@68
  1525
     * @see     Character#isTitleCase(char)
jaroslav@68
  1526
     * @see     Character#toLowerCase(char)
jaroslav@68
  1527
     * @see     Character#getType(char)
jaroslav@68
  1528
     */
jaroslav@68
  1529
    public static boolean isLowerCase(char ch) {
jaroslav@326
  1530
        return ch == toLowerCase(ch);
jaroslav@68
  1531
    }
jaroslav@68
  1532
jaroslav@68
  1533
    /**
jaroslav@68
  1534
     * Determines if the specified character is an uppercase character.
jaroslav@68
  1535
     * <p>
jaroslav@68
  1536
     * A character is uppercase if its general category type, provided by
jaroslav@68
  1537
     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
jaroslav@68
  1538
     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
jaroslav@68
  1539
     * <p>
jaroslav@68
  1540
     * The following are examples of uppercase characters:
jaroslav@68
  1541
     * <p><blockquote><pre>
jaroslav@68
  1542
     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
jaroslav@68
  1543
     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
jaroslav@68
  1544
     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
jaroslav@68
  1545
     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
jaroslav@68
  1546
     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
jaroslav@68
  1547
     * </pre></blockquote>
jaroslav@68
  1548
     * <p> Many other Unicode characters are uppercase too.<p>
jaroslav@68
  1549
     *
jaroslav@68
  1550
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1551
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1552
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1553
     * the {@link #isUpperCase(int)} method.
jaroslav@68
  1554
     *
jaroslav@68
  1555
     * @param   ch   the character to be tested.
jaroslav@68
  1556
     * @return  {@code true} if the character is uppercase;
jaroslav@68
  1557
     *          {@code false} otherwise.
jaroslav@68
  1558
     * @see     Character#isLowerCase(char)
jaroslav@68
  1559
     * @see     Character#isTitleCase(char)
jaroslav@68
  1560
     * @see     Character#toUpperCase(char)
jaroslav@68
  1561
     * @see     Character#getType(char)
jaroslav@68
  1562
     * @since   1.0
jaroslav@68
  1563
     */
jaroslav@68
  1564
    public static boolean isUpperCase(char ch) {
jaroslav@326
  1565
        return ch == toUpperCase(ch);
jaroslav@68
  1566
    }
jaroslav@68
  1567
jaroslav@68
  1568
    /**
jaroslav@68
  1569
     * Determines if the specified character is a titlecase character.
jaroslav@68
  1570
     * <p>
jaroslav@68
  1571
     * A character is a titlecase character if its general
jaroslav@68
  1572
     * category type, provided by {@code Character.getType(ch)},
jaroslav@68
  1573
     * is {@code TITLECASE_LETTER}.
jaroslav@68
  1574
     * <p>
jaroslav@68
  1575
     * Some characters look like pairs of Latin letters. For example, there
jaroslav@68
  1576
     * is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68
  1577
     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68
  1578
     * is the appropriate form to use when rendering a word in lowercase
jaroslav@68
  1579
     * with initial capitals, as for a book title.
jaroslav@68
  1580
     * <p>
jaroslav@68
  1581
     * These are some of the Unicode characters for which this method returns
jaroslav@68
  1582
     * {@code true}:
jaroslav@68
  1583
     * <ul>
jaroslav@68
  1584
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68
  1585
     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68
  1586
     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68
  1587
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68
  1588
     * </ul>
jaroslav@68
  1589
     * <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68
  1590
     *
jaroslav@68
  1591
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1592
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1593
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1594
     * the {@link #isTitleCase(int)} method.
jaroslav@68
  1595
     *
jaroslav@68
  1596
     * @param   ch   the character to be tested.
jaroslav@68
  1597
     * @return  {@code true} if the character is titlecase;
jaroslav@68
  1598
     *          {@code false} otherwise.
jaroslav@68
  1599
     * @see     Character#isLowerCase(char)
jaroslav@68
  1600
     * @see     Character#isUpperCase(char)
jaroslav@68
  1601
     * @see     Character#toTitleCase(char)
jaroslav@68
  1602
     * @see     Character#getType(char)
jaroslav@68
  1603
     * @since   1.0.2
jaroslav@68
  1604
     */
jaroslav@68
  1605
    public static boolean isTitleCase(char ch) {
jaroslav@68
  1606
        return isTitleCase((int)ch);
jaroslav@68
  1607
    }
jaroslav@68
  1608
jaroslav@68
  1609
    /**
jaroslav@68
  1610
     * Determines if the specified character (Unicode code point) is a titlecase character.
jaroslav@68
  1611
     * <p>
jaroslav@68
  1612
     * A character is a titlecase character if its general
jaroslav@68
  1613
     * category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68
  1614
     * is {@code TITLECASE_LETTER}.
jaroslav@68
  1615
     * <p>
jaroslav@68
  1616
     * Some characters look like pairs of Latin letters. For example, there
jaroslav@68
  1617
     * is an uppercase letter that looks like "LJ" and has a corresponding
jaroslav@68
  1618
     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
jaroslav@68
  1619
     * is the appropriate form to use when rendering a word in lowercase
jaroslav@68
  1620
     * with initial capitals, as for a book title.
jaroslav@68
  1621
     * <p>
jaroslav@68
  1622
     * These are some of the Unicode characters for which this method returns
jaroslav@68
  1623
     * {@code true}:
jaroslav@68
  1624
     * <ul>
jaroslav@68
  1625
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
jaroslav@68
  1626
     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
jaroslav@68
  1627
     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
jaroslav@68
  1628
     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
jaroslav@68
  1629
     * </ul>
jaroslav@68
  1630
     * <p> Many other Unicode characters are titlecase too.<p>
jaroslav@68
  1631
     *
jaroslav@68
  1632
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  1633
     * @return  {@code true} if the character is titlecase;
jaroslav@68
  1634
     *          {@code false} otherwise.
jaroslav@68
  1635
     * @see     Character#isLowerCase(int)
jaroslav@68
  1636
     * @see     Character#isUpperCase(int)
jaroslav@68
  1637
     * @see     Character#toTitleCase(int)
jaroslav@68
  1638
     * @see     Character#getType(int)
jaroslav@68
  1639
     * @since   1.5
jaroslav@68
  1640
     */
jaroslav@68
  1641
    public static boolean isTitleCase(int codePoint) {
jaroslav@68
  1642
        return getType(codePoint) == Character.TITLECASE_LETTER;
jaroslav@68
  1643
    }
jaroslav@68
  1644
jaroslav@68
  1645
    /**
jaroslav@68
  1646
     * Determines if the specified character is a digit.
jaroslav@68
  1647
     * <p>
jaroslav@68
  1648
     * A character is a digit if its general category type, provided
jaroslav@68
  1649
     * by {@code Character.getType(ch)}, is
jaroslav@68
  1650
     * {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68
  1651
     * <p>
jaroslav@68
  1652
     * Some Unicode character ranges that contain digits:
jaroslav@68
  1653
     * <ul>
jaroslav@68
  1654
     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68
  1655
     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68
  1656
     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68
  1657
     *     Arabic-Indic digits
jaroslav@68
  1658
     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68
  1659
     *     Extended Arabic-Indic digits
jaroslav@68
  1660
     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68
  1661
     *     Devanagari digits
jaroslav@68
  1662
     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68
  1663
     *     Fullwidth digits
jaroslav@68
  1664
     * </ul>
jaroslav@68
  1665
     *
jaroslav@68
  1666
     * Many other character ranges contain digits as well.
jaroslav@68
  1667
     *
jaroslav@68
  1668
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1669
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1670
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1671
     * the {@link #isDigit(int)} method.
jaroslav@68
  1672
     *
jaroslav@68
  1673
     * @param   ch   the character to be tested.
jaroslav@68
  1674
     * @return  {@code true} if the character is a digit;
jaroslav@68
  1675
     *          {@code false} otherwise.
jaroslav@68
  1676
     * @see     Character#digit(char, int)
jaroslav@68
  1677
     * @see     Character#forDigit(int, int)
jaroslav@68
  1678
     * @see     Character#getType(char)
jaroslav@68
  1679
     */
jaroslav@68
  1680
    public static boolean isDigit(char ch) {
jaroslav@326
  1681
        return String.valueOf(ch).matches("\\d");
jaroslav@68
  1682
    }
jaroslav@68
  1683
jaroslav@68
  1684
    /**
jaroslav@68
  1685
     * Determines if the specified character (Unicode code point) is a digit.
jaroslav@68
  1686
     * <p>
jaroslav@68
  1687
     * A character is a digit if its general category type, provided
jaroslav@68
  1688
     * by {@link Character#getType(int) getType(codePoint)}, is
jaroslav@68
  1689
     * {@code DECIMAL_DIGIT_NUMBER}.
jaroslav@68
  1690
     * <p>
jaroslav@68
  1691
     * Some Unicode character ranges that contain digits:
jaroslav@68
  1692
     * <ul>
jaroslav@68
  1693
     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
jaroslav@68
  1694
     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
jaroslav@68
  1695
     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
jaroslav@68
  1696
     *     Arabic-Indic digits
jaroslav@68
  1697
     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
jaroslav@68
  1698
     *     Extended Arabic-Indic digits
jaroslav@68
  1699
     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
jaroslav@68
  1700
     *     Devanagari digits
jaroslav@68
  1701
     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
jaroslav@68
  1702
     *     Fullwidth digits
jaroslav@68
  1703
     * </ul>
jaroslav@68
  1704
     *
jaroslav@68
  1705
     * Many other character ranges contain digits as well.
jaroslav@68
  1706
     *
jaroslav@68
  1707
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  1708
     * @return  {@code true} if the character is a digit;
jaroslav@68
  1709
     *          {@code false} otherwise.
jaroslav@68
  1710
     * @see     Character#forDigit(int, int)
jaroslav@68
  1711
     * @see     Character#getType(int)
jaroslav@68
  1712
     * @since   1.5
jaroslav@68
  1713
     */
jaroslav@68
  1714
    public static boolean isDigit(int codePoint) {
jaroslav@326
  1715
        return fromCodeChars(codePoint).matches("\\d");
jaroslav@68
  1716
    }
jaroslav@326
  1717
    
jaroslav@326
  1718
    @JavaScriptBody(args = "c", body = "return String.fromCharCode(c);")
jaroslav@326
  1719
    private native static String fromCodeChars(int codePoint);
jaroslav@68
  1720
jaroslav@68
  1721
    /**
jaroslav@68
  1722
     * Determines if a character is defined in Unicode.
jaroslav@68
  1723
     * <p>
jaroslav@68
  1724
     * A character is defined if at least one of the following is true:
jaroslav@68
  1725
     * <ul>
jaroslav@68
  1726
     * <li>It has an entry in the UnicodeData file.
jaroslav@68
  1727
     * <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68
  1728
     * </ul>
jaroslav@68
  1729
     *
jaroslav@68
  1730
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1731
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1732
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1733
     * the {@link #isDefined(int)} method.
jaroslav@68
  1734
     *
jaroslav@68
  1735
     * @param   ch   the character to be tested
jaroslav@68
  1736
     * @return  {@code true} if the character has a defined meaning
jaroslav@68
  1737
     *          in Unicode; {@code false} otherwise.
jaroslav@68
  1738
     * @see     Character#isDigit(char)
jaroslav@68
  1739
     * @see     Character#isLetter(char)
jaroslav@68
  1740
     * @see     Character#isLetterOrDigit(char)
jaroslav@68
  1741
     * @see     Character#isLowerCase(char)
jaroslav@68
  1742
     * @see     Character#isTitleCase(char)
jaroslav@68
  1743
     * @see     Character#isUpperCase(char)
jaroslav@68
  1744
     * @since   1.0.2
jaroslav@68
  1745
     */
jaroslav@68
  1746
    public static boolean isDefined(char ch) {
jaroslav@68
  1747
        return isDefined((int)ch);
jaroslav@68
  1748
    }
jaroslav@68
  1749
jaroslav@68
  1750
    /**
jaroslav@68
  1751
     * Determines if a character (Unicode code point) is defined in Unicode.
jaroslav@68
  1752
     * <p>
jaroslav@68
  1753
     * A character is defined if at least one of the following is true:
jaroslav@68
  1754
     * <ul>
jaroslav@68
  1755
     * <li>It has an entry in the UnicodeData file.
jaroslav@68
  1756
     * <li>It has a value in a range defined by the UnicodeData file.
jaroslav@68
  1757
     * </ul>
jaroslav@68
  1758
     *
jaroslav@68
  1759
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  1760
     * @return  {@code true} if the character has a defined meaning
jaroslav@68
  1761
     *          in Unicode; {@code false} otherwise.
jaroslav@68
  1762
     * @see     Character#isDigit(int)
jaroslav@68
  1763
     * @see     Character#isLetter(int)
jaroslav@68
  1764
     * @see     Character#isLetterOrDigit(int)
jaroslav@68
  1765
     * @see     Character#isLowerCase(int)
jaroslav@68
  1766
     * @see     Character#isTitleCase(int)
jaroslav@68
  1767
     * @see     Character#isUpperCase(int)
jaroslav@68
  1768
     * @since   1.5
jaroslav@68
  1769
     */
jaroslav@68
  1770
    public static boolean isDefined(int codePoint) {
jaroslav@68
  1771
        return getType(codePoint) != Character.UNASSIGNED;
jaroslav@68
  1772
    }
jaroslav@68
  1773
jaroslav@68
  1774
    /**
jaroslav@68
  1775
     * Determines if the specified character is a letter.
jaroslav@68
  1776
     * <p>
jaroslav@68
  1777
     * A character is considered to be a letter if its general
jaroslav@68
  1778
     * category type, provided by {@code Character.getType(ch)},
jaroslav@68
  1779
     * is any of the following:
jaroslav@68
  1780
     * <ul>
jaroslav@68
  1781
     * <li> {@code UPPERCASE_LETTER}
jaroslav@68
  1782
     * <li> {@code LOWERCASE_LETTER}
jaroslav@68
  1783
     * <li> {@code TITLECASE_LETTER}
jaroslav@68
  1784
     * <li> {@code MODIFIER_LETTER}
jaroslav@68
  1785
     * <li> {@code OTHER_LETTER}
jaroslav@68
  1786
     * </ul>
jaroslav@68
  1787
     *
jaroslav@68
  1788
     * Not all letters have case. Many characters are
jaroslav@68
  1789
     * letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68
  1790
     *
jaroslav@68
  1791
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1792
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1793
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1794
     * the {@link #isLetter(int)} method.
jaroslav@68
  1795
     *
jaroslav@68
  1796
     * @param   ch   the character to be tested.
jaroslav@68
  1797
     * @return  {@code true} if the character is a letter;
jaroslav@68
  1798
     *          {@code false} otherwise.
jaroslav@68
  1799
     * @see     Character#isDigit(char)
jaroslav@68
  1800
     * @see     Character#isJavaIdentifierStart(char)
jaroslav@68
  1801
     * @see     Character#isJavaLetter(char)
jaroslav@68
  1802
     * @see     Character#isJavaLetterOrDigit(char)
jaroslav@68
  1803
     * @see     Character#isLetterOrDigit(char)
jaroslav@68
  1804
     * @see     Character#isLowerCase(char)
jaroslav@68
  1805
     * @see     Character#isTitleCase(char)
jaroslav@68
  1806
     * @see     Character#isUnicodeIdentifierStart(char)
jaroslav@68
  1807
     * @see     Character#isUpperCase(char)
jaroslav@68
  1808
     */
jaroslav@68
  1809
    public static boolean isLetter(char ch) {
jaroslav@326
  1810
        return String.valueOf(ch).matches("\\w") && !isDigit(ch);
jaroslav@68
  1811
    }
jaroslav@68
  1812
jaroslav@68
  1813
    /**
jaroslav@68
  1814
     * Determines if the specified character (Unicode code point) is a letter.
jaroslav@68
  1815
     * <p>
jaroslav@68
  1816
     * A character is considered to be a letter if its general
jaroslav@68
  1817
     * category type, provided by {@link Character#getType(int) getType(codePoint)},
jaroslav@68
  1818
     * is any of the following:
jaroslav@68
  1819
     * <ul>
jaroslav@68
  1820
     * <li> {@code UPPERCASE_LETTER}
jaroslav@68
  1821
     * <li> {@code LOWERCASE_LETTER}
jaroslav@68
  1822
     * <li> {@code TITLECASE_LETTER}
jaroslav@68
  1823
     * <li> {@code MODIFIER_LETTER}
jaroslav@68
  1824
     * <li> {@code OTHER_LETTER}
jaroslav@68
  1825
     * </ul>
jaroslav@68
  1826
     *
jaroslav@68
  1827
     * Not all letters have case. Many characters are
jaroslav@68
  1828
     * letters but are neither uppercase nor lowercase nor titlecase.
jaroslav@68
  1829
     *
jaroslav@68
  1830
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  1831
     * @return  {@code true} if the character is a letter;
jaroslav@68
  1832
     *          {@code false} otherwise.
jaroslav@68
  1833
     * @see     Character#isDigit(int)
jaroslav@68
  1834
     * @see     Character#isJavaIdentifierStart(int)
jaroslav@68
  1835
     * @see     Character#isLetterOrDigit(int)
jaroslav@68
  1836
     * @see     Character#isLowerCase(int)
jaroslav@68
  1837
     * @see     Character#isTitleCase(int)
jaroslav@68
  1838
     * @see     Character#isUnicodeIdentifierStart(int)
jaroslav@68
  1839
     * @see     Character#isUpperCase(int)
jaroslav@68
  1840
     * @since   1.5
jaroslav@68
  1841
     */
jaroslav@68
  1842
    public static boolean isLetter(int codePoint) {
jaroslav@326
  1843
        return fromCodeChars(codePoint).matches("\\w") && !isDigit(codePoint);
jaroslav@68
  1844
    }
jaroslav@68
  1845
jaroslav@68
  1846
    /**
jaroslav@68
  1847
     * Determines if the specified character is a letter or digit.
jaroslav@68
  1848
     * <p>
jaroslav@68
  1849
     * A character is considered to be a letter or digit if either
jaroslav@68
  1850
     * {@code Character.isLetter(char ch)} or
jaroslav@68
  1851
     * {@code Character.isDigit(char ch)} returns
jaroslav@68
  1852
     * {@code true} for the character.
jaroslav@68
  1853
     *
jaroslav@68
  1854
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  1855
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  1856
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  1857
     * the {@link #isLetterOrDigit(int)} method.
jaroslav@68
  1858
     *
jaroslav@68
  1859
     * @param   ch   the character to be tested.
jaroslav@68
  1860
     * @return  {@code true} if the character is a letter or digit;
jaroslav@68
  1861
     *          {@code false} otherwise.
jaroslav@68
  1862
     * @see     Character#isDigit(char)
jaroslav@68
  1863
     * @see     Character#isJavaIdentifierPart(char)
jaroslav@68
  1864
     * @see     Character#isJavaLetter(char)
jaroslav@68
  1865
     * @see     Character#isJavaLetterOrDigit(char)
jaroslav@68
  1866
     * @see     Character#isLetter(char)
jaroslav@68
  1867
     * @see     Character#isUnicodeIdentifierPart(char)
jaroslav@68
  1868
     * @since   1.0.2
jaroslav@68
  1869
     */
jaroslav@68
  1870
    public static boolean isLetterOrDigit(char ch) {
jaroslav@326
  1871
        return String.valueOf(ch).matches("\\w");
jaroslav@68
  1872
    }
jaroslav@68
  1873
jaroslav@68
  1874
    /**
jaroslav@68
  1875
     * Determines if the specified character (Unicode code point) is a letter or digit.
jaroslav@68
  1876
     * <p>
jaroslav@68
  1877
     * A character is considered to be a letter or digit if either
jaroslav@68
  1878
     * {@link #isLetter(int) isLetter(codePoint)} or
jaroslav@68
  1879
     * {@link #isDigit(int) isDigit(codePoint)} returns
jaroslav@68
  1880
     * {@code true} for the character.
jaroslav@68
  1881
     *
jaroslav@68
  1882
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  1883
     * @return  {@code true} if the character is a letter or digit;
jaroslav@68
  1884
     *          {@code false} otherwise.
jaroslav@68
  1885
     * @see     Character#isDigit(int)
jaroslav@68
  1886
     * @see     Character#isJavaIdentifierPart(int)
jaroslav@68
  1887
     * @see     Character#isLetter(int)
jaroslav@68
  1888
     * @see     Character#isUnicodeIdentifierPart(int)
jaroslav@68
  1889
     * @since   1.5
jaroslav@68
  1890
     */
jaroslav@68
  1891
    public static boolean isLetterOrDigit(int codePoint) {
jaroslav@326
  1892
        return fromCodeChars(codePoint).matches("\\w");
jaroslav@68
  1893
    }
jaroslav@85
  1894
    
jaroslav@85
  1895
    static int getType(int x) {
jaroslav@85
  1896
        throw new UnsupportedOperationException();
jaroslav@68
  1897
    }
jaroslav@563
  1898
 
jaroslav@563
  1899
    /**
jaroslav@563
  1900
     * Determines if the specified character is
jaroslav@563
  1901
     * permissible as the first character in a Java identifier.
jaroslav@563
  1902
     * <p>
jaroslav@563
  1903
     * A character may start a Java identifier if and only if
jaroslav@563
  1904
     * one of the following conditions is true:
jaroslav@563
  1905
     * <ul>
jaroslav@563
  1906
     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
jaroslav@563
  1907
     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
jaroslav@563
  1908
     * <li> {@code ch} is a currency symbol (such as {@code '$'})
jaroslav@563
  1909
     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
jaroslav@563
  1910
     * </ul>
jaroslav@563
  1911
     *
jaroslav@563
  1912
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@563
  1913
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@563
  1914
     * all Unicode characters, including supplementary characters, use
jaroslav@563
  1915
     * the {@link #isJavaIdentifierStart(int)} method.
jaroslav@563
  1916
     *
jaroslav@563
  1917
     * @param   ch the character to be tested.
jaroslav@563
  1918
     * @return  {@code true} if the character may start a Java identifier;
jaroslav@563
  1919
     *          {@code false} otherwise.
jaroslav@563
  1920
     * @see     Character#isJavaIdentifierPart(char)
jaroslav@563
  1921
     * @see     Character#isLetter(char)
jaroslav@563
  1922
     * @see     Character#isUnicodeIdentifierStart(char)
jaroslav@563
  1923
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563
  1924
     * @since   1.1
jaroslav@563
  1925
     */
jaroslav@563
  1926
    public static boolean isJavaIdentifierStart(char ch) {
jaroslav@563
  1927
        return isJavaIdentifierStart((int)ch);
jaroslav@563
  1928
    }
jaroslav@563
  1929
jaroslav@563
  1930
    /**
jaroslav@563
  1931
     * Determines if the character (Unicode code point) is
jaroslav@563
  1932
     * permissible as the first character in a Java identifier.
jaroslav@563
  1933
     * <p>
jaroslav@563
  1934
     * A character may start a Java identifier if and only if
jaroslav@563
  1935
     * one of the following conditions is true:
jaroslav@563
  1936
     * <ul>
jaroslav@563
  1937
     * <li> {@link #isLetter(int) isLetter(codePoint)}
jaroslav@563
  1938
     *      returns {@code true}
jaroslav@563
  1939
     * <li> {@link #getType(int) getType(codePoint)}
jaroslav@563
  1940
     *      returns {@code LETTER_NUMBER}
jaroslav@563
  1941
     * <li> the referenced character is a currency symbol (such as {@code '$'})
jaroslav@563
  1942
     * <li> the referenced character is a connecting punctuation character
jaroslav@563
  1943
     *      (such as {@code '_'}).
jaroslav@563
  1944
     * </ul>
jaroslav@563
  1945
     *
jaroslav@563
  1946
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@563
  1947
     * @return  {@code true} if the character may start a Java identifier;
jaroslav@563
  1948
     *          {@code false} otherwise.
jaroslav@563
  1949
     * @see     Character#isJavaIdentifierPart(int)
jaroslav@563
  1950
     * @see     Character#isLetter(int)
jaroslav@563
  1951
     * @see     Character#isUnicodeIdentifierStart(int)
jaroslav@563
  1952
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563
  1953
     * @since   1.5
jaroslav@563
  1954
     */
jaroslav@563
  1955
    public static boolean isJavaIdentifierStart(int codePoint) {
jaroslav@563
  1956
        return 
jaroslav@563
  1957
            ('A' <= codePoint && codePoint <= 'Z') ||
jaroslav@563
  1958
            ('a' <= codePoint && codePoint <= 'z');
jaroslav@563
  1959
    }
jaroslav@563
  1960
jaroslav@563
  1961
    /**
jaroslav@563
  1962
     * Determines if the specified character may be part of a Java
jaroslav@563
  1963
     * identifier as other than the first character.
jaroslav@563
  1964
     * <p>
jaroslav@563
  1965
     * A character may be part of a Java identifier if any of the following
jaroslav@563
  1966
     * are true:
jaroslav@563
  1967
     * <ul>
jaroslav@563
  1968
     * <li>  it is a letter
jaroslav@563
  1969
     * <li>  it is a currency symbol (such as {@code '$'})
jaroslav@563
  1970
     * <li>  it is a connecting punctuation character (such as {@code '_'})
jaroslav@563
  1971
     * <li>  it is a digit
jaroslav@563
  1972
     * <li>  it is a numeric letter (such as a Roman numeral character)
jaroslav@563
  1973
     * <li>  it is a combining mark
jaroslav@563
  1974
     * <li>  it is a non-spacing mark
jaroslav@563
  1975
     * <li> {@code isIdentifierIgnorable} returns
jaroslav@563
  1976
     * {@code true} for the character
jaroslav@563
  1977
     * </ul>
jaroslav@563
  1978
     *
jaroslav@563
  1979
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@563
  1980
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@563
  1981
     * all Unicode characters, including supplementary characters, use
jaroslav@563
  1982
     * the {@link #isJavaIdentifierPart(int)} method.
jaroslav@563
  1983
     *
jaroslav@563
  1984
     * @param   ch      the character to be tested.
jaroslav@563
  1985
     * @return {@code true} if the character may be part of a
jaroslav@563
  1986
     *          Java identifier; {@code false} otherwise.
jaroslav@563
  1987
     * @see     Character#isIdentifierIgnorable(char)
jaroslav@563
  1988
     * @see     Character#isJavaIdentifierStart(char)
jaroslav@563
  1989
     * @see     Character#isLetterOrDigit(char)
jaroslav@563
  1990
     * @see     Character#isUnicodeIdentifierPart(char)
jaroslav@563
  1991
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563
  1992
     * @since   1.1
jaroslav@563
  1993
     */
jaroslav@563
  1994
    public static boolean isJavaIdentifierPart(char ch) {
jaroslav@563
  1995
        return isJavaIdentifierPart((int)ch);
jaroslav@563
  1996
    }
jaroslav@563
  1997
jaroslav@563
  1998
    /**
jaroslav@563
  1999
     * Determines if the character (Unicode code point) may be part of a Java
jaroslav@563
  2000
     * identifier as other than the first character.
jaroslav@563
  2001
     * <p>
jaroslav@563
  2002
     * A character may be part of a Java identifier if any of the following
jaroslav@563
  2003
     * are true:
jaroslav@563
  2004
     * <ul>
jaroslav@563
  2005
     * <li>  it is a letter
jaroslav@563
  2006
     * <li>  it is a currency symbol (such as {@code '$'})
jaroslav@563
  2007
     * <li>  it is a connecting punctuation character (such as {@code '_'})
jaroslav@563
  2008
     * <li>  it is a digit
jaroslav@563
  2009
     * <li>  it is a numeric letter (such as a Roman numeral character)
jaroslav@563
  2010
     * <li>  it is a combining mark
jaroslav@563
  2011
     * <li>  it is a non-spacing mark
jaroslav@563
  2012
     * <li> {@link #isIdentifierIgnorable(int)
jaroslav@563
  2013
     * isIdentifierIgnorable(codePoint)} returns {@code true} for
jaroslav@563
  2014
     * the character
jaroslav@563
  2015
     * </ul>
jaroslav@563
  2016
     *
jaroslav@563
  2017
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@563
  2018
     * @return {@code true} if the character may be part of a
jaroslav@563
  2019
     *          Java identifier; {@code false} otherwise.
jaroslav@563
  2020
     * @see     Character#isIdentifierIgnorable(int)
jaroslav@563
  2021
     * @see     Character#isJavaIdentifierStart(int)
jaroslav@563
  2022
     * @see     Character#isLetterOrDigit(int)
jaroslav@563
  2023
     * @see     Character#isUnicodeIdentifierPart(int)
jaroslav@563
  2024
     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
jaroslav@563
  2025
     * @since   1.5
jaroslav@563
  2026
     */
jaroslav@563
  2027
    public static boolean isJavaIdentifierPart(int codePoint) {
jaroslav@563
  2028
        return isJavaIdentifierStart(codePoint) ||
jaroslav@590
  2029
            ('0' <= codePoint && codePoint <= '9') || codePoint == '$';
jaroslav@563
  2030
    }
jaroslav@563
  2031
   
jaroslav@68
  2032
    /**
jaroslav@68
  2033
     * Converts the character argument to lowercase using case
jaroslav@68
  2034
     * mapping information from the UnicodeData file.
jaroslav@68
  2035
     * <p>
jaroslav@68
  2036
     * Note that
jaroslav@68
  2037
     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
jaroslav@68
  2038
     * does not always return {@code true} for some ranges of
jaroslav@68
  2039
     * characters, particularly those that are symbols or ideographs.
jaroslav@68
  2040
     *
jaroslav@68
  2041
     * <p>In general, {@link String#toLowerCase()} should be used to map
jaroslav@68
  2042
     * characters to lowercase. {@code String} case mapping methods
jaroslav@68
  2043
     * have several benefits over {@code Character} case mapping methods.
jaroslav@68
  2044
     * {@code String} case mapping methods can perform locale-sensitive
jaroslav@68
  2045
     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68
  2046
     * the {@code Character} case mapping methods cannot.
jaroslav@68
  2047
     *
jaroslav@68
  2048
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  2049
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  2050
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  2051
     * the {@link #toLowerCase(int)} method.
jaroslav@68
  2052
     *
jaroslav@68
  2053
     * @param   ch   the character to be converted.
jaroslav@68
  2054
     * @return  the lowercase equivalent of the character, if any;
jaroslav@68
  2055
     *          otherwise, the character itself.
jaroslav@68
  2056
     * @see     Character#isLowerCase(char)
jaroslav@68
  2057
     * @see     String#toLowerCase()
jaroslav@68
  2058
     */
jaroslav@68
  2059
    public static char toLowerCase(char ch) {
jaroslav@326
  2060
        return String.valueOf(ch).toLowerCase().charAt(0);
jaroslav@68
  2061
    }
jaroslav@68
  2062
jaroslav@68
  2063
    /**
jaroslav@68
  2064
     * Converts the character argument to uppercase using case mapping
jaroslav@68
  2065
     * information from the UnicodeData file.
jaroslav@68
  2066
     * <p>
jaroslav@68
  2067
     * Note that
jaroslav@68
  2068
     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
jaroslav@68
  2069
     * does not always return {@code true} for some ranges of
jaroslav@68
  2070
     * characters, particularly those that are symbols or ideographs.
jaroslav@68
  2071
     *
jaroslav@68
  2072
     * <p>In general, {@link String#toUpperCase()} should be used to map
jaroslav@68
  2073
     * characters to uppercase. {@code String} case mapping methods
jaroslav@68
  2074
     * have several benefits over {@code Character} case mapping methods.
jaroslav@68
  2075
     * {@code String} case mapping methods can perform locale-sensitive
jaroslav@68
  2076
     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
jaroslav@68
  2077
     * the {@code Character} case mapping methods cannot.
jaroslav@68
  2078
     *
jaroslav@68
  2079
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  2080
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  2081
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  2082
     * the {@link #toUpperCase(int)} method.
jaroslav@68
  2083
     *
jaroslav@68
  2084
     * @param   ch   the character to be converted.
jaroslav@68
  2085
     * @return  the uppercase equivalent of the character, if any;
jaroslav@68
  2086
     *          otherwise, the character itself.
jaroslav@68
  2087
     * @see     Character#isUpperCase(char)
jaroslav@68
  2088
     * @see     String#toUpperCase()
jaroslav@68
  2089
     */
jaroslav@68
  2090
    public static char toUpperCase(char ch) {
jaroslav@326
  2091
        return String.valueOf(ch).toUpperCase().charAt(0);
jaroslav@68
  2092
    }
jaroslav@68
  2093
jaroslav@68
  2094
    /**
jaroslav@68
  2095
     * Returns the numeric value of the character {@code ch} in the
jaroslav@68
  2096
     * specified radix.
jaroslav@68
  2097
     * <p>
jaroslav@68
  2098
     * If the radix is not in the range {@code MIN_RADIX} &le;
jaroslav@68
  2099
     * {@code radix} &le; {@code MAX_RADIX} or if the
jaroslav@68
  2100
     * value of {@code ch} is not a valid digit in the specified
jaroslav@68
  2101
     * radix, {@code -1} is returned. A character is a valid digit
jaroslav@68
  2102
     * if at least one of the following is true:
jaroslav@68
  2103
     * <ul>
jaroslav@68
  2104
     * <li>The method {@code isDigit} is {@code true} of the character
jaroslav@68
  2105
     *     and the Unicode decimal digit value of the character (or its
jaroslav@68
  2106
     *     single-character decomposition) is less than the specified radix.
jaroslav@68
  2107
     *     In this case the decimal digit value is returned.
jaroslav@68
  2108
     * <li>The character is one of the uppercase Latin letters
jaroslav@68
  2109
     *     {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68
  2110
     *     {@code radix + 'A' - 10}.
jaroslav@68
  2111
     *     In this case, {@code ch - 'A' + 10}
jaroslav@68
  2112
     *     is returned.
jaroslav@68
  2113
     * <li>The character is one of the lowercase Latin letters
jaroslav@68
  2114
     *     {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68
  2115
     *     {@code radix + 'a' - 10}.
jaroslav@68
  2116
     *     In this case, {@code ch - 'a' + 10}
jaroslav@68
  2117
     *     is returned.
jaroslav@68
  2118
     * <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68
  2119
     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68
  2120
     *     and its code is less than
jaroslav@68
  2121
     *     {@code radix + '\u005CuFF21' - 10}.
jaroslav@68
  2122
     *     In this case, {@code ch - '\u005CuFF21' + 10}
jaroslav@68
  2123
     *     is returned.
jaroslav@68
  2124
     * <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68
  2125
     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68
  2126
     *     and its code is less than
jaroslav@68
  2127
     *     {@code radix + '\u005CuFF41' - 10}.
jaroslav@68
  2128
     *     In this case, {@code ch - '\u005CuFF41' + 10}
jaroslav@68
  2129
     *     is returned.
jaroslav@68
  2130
     * </ul>
jaroslav@68
  2131
     *
jaroslav@68
  2132
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  2133
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  2134
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  2135
     * the {@link #digit(int, int)} method.
jaroslav@68
  2136
     *
jaroslav@68
  2137
     * @param   ch      the character to be converted.
jaroslav@68
  2138
     * @param   radix   the radix.
jaroslav@68
  2139
     * @return  the numeric value represented by the character in the
jaroslav@68
  2140
     *          specified radix.
jaroslav@68
  2141
     * @see     Character#forDigit(int, int)
jaroslav@68
  2142
     * @see     Character#isDigit(char)
jaroslav@68
  2143
     */
jaroslav@68
  2144
    public static int digit(char ch, int radix) {
jaroslav@68
  2145
        return digit((int)ch, radix);
jaroslav@68
  2146
    }
jaroslav@68
  2147
jaroslav@68
  2148
    /**
jaroslav@68
  2149
     * Returns the numeric value of the specified character (Unicode
jaroslav@68
  2150
     * code point) in the specified radix.
jaroslav@68
  2151
     *
jaroslav@68
  2152
     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
jaroslav@68
  2153
     * {@code radix} &le; {@code MAX_RADIX} or if the
jaroslav@68
  2154
     * character is not a valid digit in the specified
jaroslav@68
  2155
     * radix, {@code -1} is returned. A character is a valid digit
jaroslav@68
  2156
     * if at least one of the following is true:
jaroslav@68
  2157
     * <ul>
jaroslav@68
  2158
     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
jaroslav@68
  2159
     *     and the Unicode decimal digit value of the character (or its
jaroslav@68
  2160
     *     single-character decomposition) is less than the specified radix.
jaroslav@68
  2161
     *     In this case the decimal digit value is returned.
jaroslav@68
  2162
     * <li>The character is one of the uppercase Latin letters
jaroslav@68
  2163
     *     {@code 'A'} through {@code 'Z'} and its code is less than
jaroslav@68
  2164
     *     {@code radix + 'A' - 10}.
jaroslav@68
  2165
     *     In this case, {@code codePoint - 'A' + 10}
jaroslav@68
  2166
     *     is returned.
jaroslav@68
  2167
     * <li>The character is one of the lowercase Latin letters
jaroslav@68
  2168
     *     {@code 'a'} through {@code 'z'} and its code is less than
jaroslav@68
  2169
     *     {@code radix + 'a' - 10}.
jaroslav@68
  2170
     *     In this case, {@code codePoint - 'a' + 10}
jaroslav@68
  2171
     *     is returned.
jaroslav@68
  2172
     * <li>The character is one of the fullwidth uppercase Latin letters A
jaroslav@68
  2173
     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
jaroslav@68
  2174
     *     and its code is less than
jaroslav@68
  2175
     *     {@code radix + '\u005CuFF21' - 10}.
jaroslav@68
  2176
     *     In this case,
jaroslav@68
  2177
     *     {@code codePoint - '\u005CuFF21' + 10}
jaroslav@68
  2178
     *     is returned.
jaroslav@68
  2179
     * <li>The character is one of the fullwidth lowercase Latin letters a
jaroslav@68
  2180
     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
jaroslav@68
  2181
     *     and its code is less than
jaroslav@68
  2182
     *     {@code radix + '\u005CuFF41'- 10}.
jaroslav@68
  2183
     *     In this case,
jaroslav@68
  2184
     *     {@code codePoint - '\u005CuFF41' + 10}
jaroslav@68
  2185
     *     is returned.
jaroslav@68
  2186
     * </ul>
jaroslav@68
  2187
     *
jaroslav@68
  2188
     * @param   codePoint the character (Unicode code point) to be converted.
jaroslav@68
  2189
     * @param   radix   the radix.
jaroslav@68
  2190
     * @return  the numeric value represented by the character in the
jaroslav@68
  2191
     *          specified radix.
jaroslav@68
  2192
     * @see     Character#forDigit(int, int)
jaroslav@68
  2193
     * @see     Character#isDigit(int)
jaroslav@68
  2194
     * @since   1.5
jaroslav@68
  2195
     */
jaroslav@68
  2196
    public static int digit(int codePoint, int radix) {
jaroslav@85
  2197
        throw new UnsupportedOperationException();
jaroslav@68
  2198
    }
jaroslav@68
  2199
jaroslav@68
  2200
    /**
jaroslav@68
  2201
     * Returns the {@code int} value that the specified Unicode
jaroslav@68
  2202
     * character represents. For example, the character
jaroslav@68
  2203
     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
jaroslav@68
  2204
     * an int with a value of 50.
jaroslav@68
  2205
     * <p>
jaroslav@68
  2206
     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68
  2207
     * {@code '\u005Cu005A'}), lowercase
jaroslav@68
  2208
     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68
  2209
     * full width variant ({@code '\u005CuFF21'} through
jaroslav@68
  2210
     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68
  2211
     * {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68
  2212
     * through 35. This is independent of the Unicode specification,
jaroslav@68
  2213
     * which does not assign numeric values to these {@code char}
jaroslav@68
  2214
     * values.
jaroslav@68
  2215
     * <p>
jaroslav@68
  2216
     * If the character does not have a numeric value, then -1 is returned.
jaroslav@68
  2217
     * If the character has a numeric value that cannot be represented as a
jaroslav@68
  2218
     * nonnegative integer (for example, a fractional value), then -2
jaroslav@68
  2219
     * is returned.
jaroslav@68
  2220
     *
jaroslav@68
  2221
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  2222
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  2223
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  2224
     * the {@link #getNumericValue(int)} method.
jaroslav@68
  2225
     *
jaroslav@68
  2226
     * @param   ch      the character to be converted.
jaroslav@68
  2227
     * @return  the numeric value of the character, as a nonnegative {@code int}
jaroslav@68
  2228
     *           value; -2 if the character has a numeric value that is not a
jaroslav@68
  2229
     *          nonnegative integer; -1 if the character has no numeric value.
jaroslav@68
  2230
     * @see     Character#forDigit(int, int)
jaroslav@68
  2231
     * @see     Character#isDigit(char)
jaroslav@68
  2232
     * @since   1.1
jaroslav@68
  2233
     */
jaroslav@68
  2234
    public static int getNumericValue(char ch) {
jaroslav@68
  2235
        return getNumericValue((int)ch);
jaroslav@68
  2236
    }
jaroslav@68
  2237
jaroslav@68
  2238
    /**
jaroslav@68
  2239
     * Returns the {@code int} value that the specified
jaroslav@68
  2240
     * character (Unicode code point) represents. For example, the character
jaroslav@68
  2241
     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
jaroslav@68
  2242
     * an {@code int} with a value of 50.
jaroslav@68
  2243
     * <p>
jaroslav@68
  2244
     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
jaroslav@68
  2245
     * {@code '\u005Cu005A'}), lowercase
jaroslav@68
  2246
     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
jaroslav@68
  2247
     * full width variant ({@code '\u005CuFF21'} through
jaroslav@68
  2248
     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
jaroslav@68
  2249
     * {@code '\u005CuFF5A'}) forms have numeric values from 10
jaroslav@68
  2250
     * through 35. This is independent of the Unicode specification,
jaroslav@68
  2251
     * which does not assign numeric values to these {@code char}
jaroslav@68
  2252
     * values.
jaroslav@68
  2253
     * <p>
jaroslav@68
  2254
     * If the character does not have a numeric value, then -1 is returned.
jaroslav@68
  2255
     * If the character has a numeric value that cannot be represented as a
jaroslav@68
  2256
     * nonnegative integer (for example, a fractional value), then -2
jaroslav@68
  2257
     * is returned.
jaroslav@68
  2258
     *
jaroslav@68
  2259
     * @param   codePoint the character (Unicode code point) to be converted.
jaroslav@68
  2260
     * @return  the numeric value of the character, as a nonnegative {@code int}
jaroslav@68
  2261
     *          value; -2 if the character has a numeric value that is not a
jaroslav@68
  2262
     *          nonnegative integer; -1 if the character has no numeric value.
jaroslav@68
  2263
     * @see     Character#forDigit(int, int)
jaroslav@68
  2264
     * @see     Character#isDigit(int)
jaroslav@68
  2265
     * @since   1.5
jaroslav@68
  2266
     */
jaroslav@68
  2267
    public static int getNumericValue(int codePoint) {
jaroslav@85
  2268
        throw new UnsupportedOperationException();
jaroslav@68
  2269
    }
jaroslav@68
  2270
jaroslav@68
  2271
    /**
jaroslav@68
  2272
     * Determines if the specified character is ISO-LATIN-1 white space.
jaroslav@68
  2273
     * This method returns {@code true} for the following five
jaroslav@68
  2274
     * characters only:
jaroslav@68
  2275
     * <table>
jaroslav@68
  2276
     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
jaroslav@68
  2277
     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
jaroslav@68
  2278
     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
jaroslav@68
  2279
     *     <td>{@code NEW LINE}</td></tr>
jaroslav@68
  2280
     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
jaroslav@68
  2281
     *     <td>{@code FORM FEED}</td></tr>
jaroslav@68
  2282
     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
jaroslav@68
  2283
     *     <td>{@code CARRIAGE RETURN}</td></tr>
jaroslav@68
  2284
     * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
jaroslav@68
  2285
     *     <td>{@code SPACE}</td></tr>
jaroslav@68
  2286
     * </table>
jaroslav@68
  2287
     *
jaroslav@68
  2288
     * @param      ch   the character to be tested.
jaroslav@68
  2289
     * @return     {@code true} if the character is ISO-LATIN-1 white
jaroslav@68
  2290
     *             space; {@code false} otherwise.
jaroslav@68
  2291
     * @see        Character#isSpaceChar(char)
jaroslav@68
  2292
     * @see        Character#isWhitespace(char)
jaroslav@68
  2293
     * @deprecated Replaced by isWhitespace(char).
jaroslav@68
  2294
     */
jaroslav@68
  2295
    @Deprecated
jaroslav@68
  2296
    public static boolean isSpace(char ch) {
jaroslav@68
  2297
        return (ch <= 0x0020) &&
jaroslav@68
  2298
            (((((1L << 0x0009) |
jaroslav@68
  2299
            (1L << 0x000A) |
jaroslav@68
  2300
            (1L << 0x000C) |
jaroslav@68
  2301
            (1L << 0x000D) |
jaroslav@68
  2302
            (1L << 0x0020)) >> ch) & 1L) != 0);
jaroslav@68
  2303
    }
jaroslav@68
  2304
jaroslav@68
  2305
jaroslav@68
  2306
jaroslav@68
  2307
    /**
jaroslav@68
  2308
     * Determines if the specified character is white space according to Java.
jaroslav@68
  2309
     * A character is a Java whitespace character if and only if it satisfies
jaroslav@68
  2310
     * one of the following criteria:
jaroslav@68
  2311
     * <ul>
jaroslav@68
  2312
     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
jaroslav@68
  2313
     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
jaroslav@68
  2314
     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68
  2315
     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68
  2316
     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68
  2317
     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68
  2318
     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68
  2319
     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68
  2320
     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68
  2321
     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68
  2322
     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68
  2323
     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68
  2324
     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68
  2325
     * </ul>
jaroslav@68
  2326
     *
jaroslav@68
  2327
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  2328
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  2329
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  2330
     * the {@link #isWhitespace(int)} method.
jaroslav@68
  2331
     *
jaroslav@68
  2332
     * @param   ch the character to be tested.
jaroslav@68
  2333
     * @return  {@code true} if the character is a Java whitespace
jaroslav@68
  2334
     *          character; {@code false} otherwise.
jaroslav@68
  2335
     * @see     Character#isSpaceChar(char)
jaroslav@68
  2336
     * @since   1.1
jaroslav@68
  2337
     */
jaroslav@68
  2338
    public static boolean isWhitespace(char ch) {
jaroslav@68
  2339
        return isWhitespace((int)ch);
jaroslav@68
  2340
    }
jaroslav@68
  2341
jaroslav@68
  2342
    /**
jaroslav@68
  2343
     * Determines if the specified character (Unicode code point) is
jaroslav@68
  2344
     * white space according to Java.  A character is a Java
jaroslav@68
  2345
     * whitespace character if and only if it satisfies one of the
jaroslav@68
  2346
     * following criteria:
jaroslav@68
  2347
     * <ul>
jaroslav@68
  2348
     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
jaroslav@68
  2349
     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
jaroslav@68
  2350
     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
jaroslav@68
  2351
     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
jaroslav@68
  2352
     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
jaroslav@68
  2353
     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
jaroslav@68
  2354
     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
jaroslav@68
  2355
     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
jaroslav@68
  2356
     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
jaroslav@68
  2357
     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
jaroslav@68
  2358
     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
jaroslav@68
  2359
     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
jaroslav@68
  2360
     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
jaroslav@68
  2361
     * </ul>
jaroslav@68
  2362
     * <p>
jaroslav@68
  2363
     *
jaroslav@68
  2364
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  2365
     * @return  {@code true} if the character is a Java whitespace
jaroslav@68
  2366
     *          character; {@code false} otherwise.
jaroslav@68
  2367
     * @see     Character#isSpaceChar(int)
jaroslav@68
  2368
     * @since   1.5
jaroslav@68
  2369
     */
jaroslav@68
  2370
    public static boolean isWhitespace(int codePoint) {
jaroslav@85
  2371
        throw new UnsupportedOperationException();
jaroslav@68
  2372
    }
jaroslav@68
  2373
jaroslav@68
  2374
    /**
jaroslav@68
  2375
     * Determines if the specified character is an ISO control
jaroslav@68
  2376
     * character.  A character is considered to be an ISO control
jaroslav@68
  2377
     * character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68
  2378
     * through {@code '\u005Cu001F'} or in the range
jaroslav@68
  2379
     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68
  2380
     *
jaroslav@68
  2381
     * <p><b>Note:</b> This method cannot handle <a
jaroslav@68
  2382
     * href="#supplementary"> supplementary characters</a>. To support
jaroslav@68
  2383
     * all Unicode characters, including supplementary characters, use
jaroslav@68
  2384
     * the {@link #isISOControl(int)} method.
jaroslav@68
  2385
     *
jaroslav@68
  2386
     * @param   ch      the character to be tested.
jaroslav@68
  2387
     * @return  {@code true} if the character is an ISO control character;
jaroslav@68
  2388
     *          {@code false} otherwise.
jaroslav@68
  2389
     *
jaroslav@68
  2390
     * @see     Character#isSpaceChar(char)
jaroslav@68
  2391
     * @see     Character#isWhitespace(char)
jaroslav@68
  2392
     * @since   1.1
jaroslav@68
  2393
     */
jaroslav@68
  2394
    public static boolean isISOControl(char ch) {
jaroslav@68
  2395
        return isISOControl((int)ch);
jaroslav@68
  2396
    }
jaroslav@68
  2397
jaroslav@68
  2398
    /**
jaroslav@68
  2399
     * Determines if the referenced character (Unicode code point) is an ISO control
jaroslav@68
  2400
     * character.  A character is considered to be an ISO control
jaroslav@68
  2401
     * character if its code is in the range {@code '\u005Cu0000'}
jaroslav@68
  2402
     * through {@code '\u005Cu001F'} or in the range
jaroslav@68
  2403
     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
jaroslav@68
  2404
     *
jaroslav@68
  2405
     * @param   codePoint the character (Unicode code point) to be tested.
jaroslav@68
  2406
     * @return  {@code true} if the character is an ISO control character;
jaroslav@68
  2407
     *          {@code false} otherwise.
jaroslav@68
  2408
     * @see     Character#isSpaceChar(int)
jaroslav@68
  2409
     * @see     Character#isWhitespace(int)
jaroslav@68
  2410
     * @since   1.5
jaroslav@68
  2411
     */
jaroslav@68
  2412
    public static boolean isISOControl(int codePoint) {
jaroslav@68
  2413
        // Optimized form of:
jaroslav@68
  2414
        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
jaroslav@68
  2415
        //     (codePoint >= 0x7F && codePoint <= 0x9F);
jaroslav@68
  2416
        return codePoint <= 0x9F &&
jaroslav@68
  2417
            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
jaroslav@68
  2418
    }
jaroslav@68
  2419
jaroslav@68
  2420
    /**
jaroslav@68
  2421
     * Determines the character representation for a specific digit in
jaroslav@68
  2422
     * the specified radix. If the value of {@code radix} is not a
jaroslav@68
  2423
     * valid radix, or the value of {@code digit} is not a valid
jaroslav@68
  2424
     * digit in the specified radix, the null character
jaroslav@68
  2425
     * ({@code '\u005Cu0000'}) is returned.
jaroslav@68
  2426
     * <p>
jaroslav@68
  2427
     * The {@code radix} argument is valid if it is greater than or
jaroslav@68
  2428
     * equal to {@code MIN_RADIX} and less than or equal to
jaroslav@68
  2429
     * {@code MAX_RADIX}. The {@code digit} argument is valid if
jaroslav@68
  2430
     * {@code 0 <= digit < radix}.
jaroslav@68
  2431
     * <p>
jaroslav@68
  2432
     * If the digit is less than 10, then
jaroslav@68
  2433
     * {@code '0' + digit} is returned. Otherwise, the value
jaroslav@68
  2434
     * {@code 'a' + digit - 10} is returned.
jaroslav@68
  2435
     *
jaroslav@68
  2436
     * @param   digit   the number to convert to a character.
jaroslav@68
  2437
     * @param   radix   the radix.
jaroslav@68
  2438
     * @return  the {@code char} representation of the specified digit
jaroslav@68
  2439
     *          in the specified radix.
jaroslav@68
  2440
     * @see     Character#MIN_RADIX
jaroslav@68
  2441
     * @see     Character#MAX_RADIX
jaroslav@68
  2442
     * @see     Character#digit(char, int)
jaroslav@68
  2443
     */
jaroslav@68
  2444
    public static char forDigit(int digit, int radix) {
jaroslav@68
  2445
        if ((digit >= radix) || (digit < 0)) {
jaroslav@68
  2446
            return '\0';
jaroslav@68
  2447
        }
jaroslav@68
  2448
        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
jaroslav@68
  2449
            return '\0';
jaroslav@68
  2450
        }
jaroslav@68
  2451
        if (digit < 10) {
jaroslav@68
  2452
            return (char)('0' + digit);
jaroslav@68
  2453
        }
jaroslav@68
  2454
        return (char)('a' - 10 + digit);
jaroslav@68
  2455
    }
jaroslav@68
  2456
jaroslav@68
  2457
    /**
jaroslav@68
  2458
     * Compares two {@code Character} objects numerically.
jaroslav@68
  2459
     *
jaroslav@68
  2460
     * @param   anotherCharacter   the {@code Character} to be compared.
jaroslav@68
  2461
jaroslav@68
  2462
     * @return  the value {@code 0} if the argument {@code Character}
jaroslav@68
  2463
     *          is equal to this {@code Character}; a value less than
jaroslav@68
  2464
     *          {@code 0} if this {@code Character} is numerically less
jaroslav@68
  2465
     *          than the {@code Character} argument; and a value greater than
jaroslav@68
  2466
     *          {@code 0} if this {@code Character} is numerically greater
jaroslav@68
  2467
     *          than the {@code Character} argument (unsigned comparison).
jaroslav@68
  2468
     *          Note that this is strictly a numerical comparison; it is not
jaroslav@68
  2469
     *          locale-dependent.
jaroslav@68
  2470
     * @since   1.2
jaroslav@68
  2471
     */
jaroslav@68
  2472
    public int compareTo(Character anotherCharacter) {
jaroslav@68
  2473
        return compare(this.value, anotherCharacter.value);
jaroslav@68
  2474
    }
jaroslav@68
  2475
jaroslav@68
  2476
    /**
jaroslav@68
  2477
     * Compares two {@code char} values numerically.
jaroslav@68
  2478
     * The value returned is identical to what would be returned by:
jaroslav@68
  2479
     * <pre>
jaroslav@68
  2480
     *    Character.valueOf(x).compareTo(Character.valueOf(y))
jaroslav@68
  2481
     * </pre>
jaroslav@68
  2482
     *
jaroslav@68
  2483
     * @param  x the first {@code char} to compare
jaroslav@68
  2484
     * @param  y the second {@code char} to compare
jaroslav@68
  2485
     * @return the value {@code 0} if {@code x == y};
jaroslav@68
  2486
     *         a value less than {@code 0} if {@code x < y}; and
jaroslav@68
  2487
     *         a value greater than {@code 0} if {@code x > y}
jaroslav@68
  2488
     * @since 1.7
jaroslav@68
  2489
     */
jaroslav@68
  2490
    public static int compare(char x, char y) {
jaroslav@68
  2491
        return x - y;
jaroslav@68
  2492
    }
jaroslav@68
  2493
jaroslav@68
  2494
jaroslav@68
  2495
    /**
jaroslav@68
  2496
     * The number of bits used to represent a <tt>char</tt> value in unsigned
jaroslav@68
  2497
     * binary form, constant {@code 16}.
jaroslav@68
  2498
     *
jaroslav@68
  2499
     * @since 1.5
jaroslav@68
  2500
     */
jaroslav@68
  2501
    public static final int SIZE = 16;
jaroslav@68
  2502
jaroslav@68
  2503
    /**
jaroslav@68
  2504
     * Returns the value obtained by reversing the order of the bytes in the
jaroslav@68
  2505
     * specified <tt>char</tt> value.
jaroslav@68
  2506
     *
jaroslav@68
  2507
     * @return the value obtained by reversing (or, equivalently, swapping)
jaroslav@68
  2508
     *     the bytes in the specified <tt>char</tt> value.
jaroslav@68
  2509
     * @since 1.5
jaroslav@68
  2510
     */
jaroslav@68
  2511
    public static char reverseBytes(char ch) {
jaroslav@68
  2512
        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
jaroslav@68
  2513
    }
jaroslav@68
  2514
jaroslav@68
  2515
}