hg/bck2brwsr: emul/src/main/java/java/lang/Character.java@a2924470187b

     1 /*

     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package java.lang;

    28 import java.util.Arrays;

    29 import java.util.Map;

    30 import java.util.HashMap;

    31 import java.util.Locale;

    33 /**

    34  * The {@code Character} class wraps a value of the primitive

    35  * type {@code char} in an object. An object of type

    36  * {@code Character} contains a single field whose type is

    37  * {@code char}.

    38  * <p>

    39  * In addition, this class provides several methods for determining

    40  * a character's category (lowercase letter, digit, etc.) and for converting

    41  * characters from uppercase to lowercase and vice versa.

    42  * <p>

    43  * Character information is based on the Unicode Standard, version 6.0.0.

    44  * <p>

    45  * The methods and data of class {@code Character} are defined by

    46  * the information in the <i>UnicodeData</i> file that is part of the

    47  * Unicode Character Database maintained by the Unicode

    48  * Consortium. This file specifies various properties including name

    49  * and general category for every defined Unicode code point or

    50  * character range.

    51  * <p>

    52  * The file and its description are available from the Unicode Consortium at:

    53  * <ul>

    54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>

    55  * </ul>

    56  *

    57  * <h4><a name="unicode">Unicode Character Representations</a></h4>

    58  *

    59  * <p>The {@code char} data type (and therefore the value that a

    60  * {@code Character} object encapsulates) are based on the

    61  * original Unicode specification, which defined characters as

    62  * fixed-width 16-bit entities. The Unicode Standard has since been

    63  * changed to allow for characters whose representation requires more

    64  * than 16 bits.  The range of legal <em>code point</em>s is now

    65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.

    66  * (Refer to the <a

    67  * href="http://www.unicode.org/reports/tr27/#notation"><i>

    68  * definition</i></a> of the U+<i>n</i> notation in the Unicode

    69  * Standard.)

    70  *

    71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is

    72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.

    73  * <a name="supplementary">Characters</a> whose code points are greater

    74  * than U+FFFF are called <em>supplementary character</em>s.  The Java

    75  * platform uses the UTF-16 representation in {@code char} arrays and

    76  * in the {@code String} and {@code StringBuffer} classes. In

    77  * this representation, supplementary characters are represented as a pair

    78  * of {@code char} values, the first from the <em>high-surrogates</em>

    79  * range, (&#92;uD800-&#92;uDBFF), the second from the

    80  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).

    81  *

    82  * <p>A {@code char} value, therefore, represents Basic

    83  * Multilingual Plane (BMP) code points, including the surrogate

    84  * code points, or code units of the UTF-16 encoding. An

    85  * {@code int} value represents all Unicode code points,

    86  * including supplementary code points. The lower (least significant)

    87  * 21 bits of {@code int} are used to represent Unicode code

    88  * points and the upper (most significant) 11 bits must be zero.

    89  * Unless otherwise specified, the behavior with respect to

    90  * supplementary characters and surrogate {@code char} values is

    91  * as follows:

    92  *

    93  * <ul>

    94  * <li>The methods that only accept a {@code char} value cannot support

    95  * supplementary characters. They treat {@code char} values from the

    96  * surrogate ranges as undefined characters. For example,

    97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though

    98  * this specific value if followed by any low-surrogate value in a string

    99  * would represent a letter.

   100  *

   101  * <li>The methods that accept an {@code int} value support all

   102  * Unicode characters, including supplementary characters. For

   103  * example, {@code Character.isLetter(0x2F81A)} returns

   104  * {@code true} because the code point value represents a letter

   105  * (a CJK ideograph).

   106  * </ul>

   107  *

   108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is

   109  * used for character values in the range between U+0000 and U+10FFFF,

   110  * and <em>Unicode code unit</em> is used for 16-bit

   111  * {@code char} values that are code units of the <em>UTF-16</em>

   112  * encoding. For more information on Unicode terminology, refer to the

   113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.

   114  *

   115  * @author  Lee Boynton

   116  * @author  Guy Steele

   117  * @author  Akira Tanaka

   118  * @author  Martin Buchholz

   119  * @author  Ulf Zibis

   120  * @since   1.0

   121  */

   122 public final

   123 class Character implements java.io.Serializable, Comparable<Character> {

   124     /**

   125      * The minimum radix available for conversion to and from strings.

   126      * The constant value of this field is the smallest value permitted

   127      * for the radix argument in radix-conversion methods such as the

   128      * {@code digit} method, the {@code forDigit} method, and the

   129      * {@code toString} method of class {@code Integer}.

   130      *

   131      * @see     Character#digit(char, int)

   132      * @see     Character#forDigit(int, int)

   133      * @see     Integer#toString(int, int)

   134      * @see     Integer#valueOf(String)

   135      */

   136     public static final int MIN_RADIX = 2;

   138     /**

   139      * The maximum radix available for conversion to and from strings.

   140      * The constant value of this field is the largest value permitted

   141      * for the radix argument in radix-conversion methods such as the

   142      * {@code digit} method, the {@code forDigit} method, and the

   143      * {@code toString} method of class {@code Integer}.

   144      *

   145      * @see     Character#digit(char, int)

   146      * @see     Character#forDigit(int, int)

   147      * @see     Integer#toString(int, int)

   148      * @see     Integer#valueOf(String)

   149      */

   150     public static final int MAX_RADIX = 36;

   152     /**

   153      * The constant value of this field is the smallest value of type

   154      * {@code char}, {@code '\u005Cu0000'}.

   155      *

   156      * @since   1.0.2

   157      */

   158     public static final char MIN_VALUE = '\u0000';

   160     /**

   161      * The constant value of this field is the largest value of type

   162      * {@code char}, {@code '\u005CuFFFF'}.

   163      *

   164      * @since   1.0.2

   165      */

   166     public static final char MAX_VALUE = '\uFFFF';

   168     /**

   169      * The {@code Class} instance representing the primitive type

   170      * {@code char}.

   171      *

   172      * @since   1.1

   173      */

   174     @SuppressWarnings("unchecked")

   175     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");

   177     /*

   178      * Normative general types

   179      */

   181     /*

   182      * General character types

   183      */

   185     /**

   186      * General category "Cn" in the Unicode specification.

   187      * @since   1.1

   188      */

   189     public static final byte UNASSIGNED = 0;

   191     /**

   192      * General category "Lu" in the Unicode specification.

   193      * @since   1.1

   194      */

   195     public static final byte UPPERCASE_LETTER = 1;

   197     /**

   198      * General category "Ll" in the Unicode specification.

   199      * @since   1.1

   200      */

   201     public static final byte LOWERCASE_LETTER = 2;

   203     /**

   204      * General category "Lt" in the Unicode specification.

   205      * @since   1.1

   206      */

   207     public static final byte TITLECASE_LETTER = 3;

   209     /**

   210      * General category "Lm" in the Unicode specification.

   211      * @since   1.1

   212      */

   213     public static final byte MODIFIER_LETTER = 4;

   215     /**

   216      * General category "Lo" in the Unicode specification.

   217      * @since   1.1

   218      */

   219     public static final byte OTHER_LETTER = 5;

   221     /**

   222      * General category "Mn" in the Unicode specification.

   223      * @since   1.1

   224      */

   225     public static final byte NON_SPACING_MARK = 6;

   227     /**

   228      * General category "Me" in the Unicode specification.

   229      * @since   1.1

   230      */

   231     public static final byte ENCLOSING_MARK = 7;

   233     /**

   234      * General category "Mc" in the Unicode specification.

   235      * @since   1.1

   236      */

   237     public static final byte COMBINING_SPACING_MARK = 8;

   239     /**

   240      * General category "Nd" in the Unicode specification.

   241      * @since   1.1

   242      */

   243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;

   245     /**

   246      * General category "Nl" in the Unicode specification.

   247      * @since   1.1

   248      */

   249     public static final byte LETTER_NUMBER = 10;

   251     /**

   252      * General category "No" in the Unicode specification.

   253      * @since   1.1

   254      */

   255     public static final byte OTHER_NUMBER = 11;

   257     /**

   258      * General category "Zs" in the Unicode specification.

   259      * @since   1.1

   260      */

   261     public static final byte SPACE_SEPARATOR = 12;

   263     /**

   264      * General category "Zl" in the Unicode specification.

   265      * @since   1.1

   266      */

   267     public static final byte LINE_SEPARATOR = 13;

   269     /**

   270      * General category "Zp" in the Unicode specification.

   271      * @since   1.1

   272      */

   273     public static final byte PARAGRAPH_SEPARATOR = 14;

   275     /**

   276      * General category "Cc" in the Unicode specification.

   277      * @since   1.1

   278      */

   279     public static final byte CONTROL = 15;

   281     /**

   282      * General category "Cf" in the Unicode specification.

   283      * @since   1.1

   284      */

   285     public static final byte FORMAT = 16;

   287     /**

   288      * General category "Co" in the Unicode specification.

   289      * @since   1.1

   290      */

   291     public static final byte PRIVATE_USE = 18;

   293     /**

   294      * General category "Cs" in the Unicode specification.

   295      * @since   1.1

   296      */

   297     public static final byte SURROGATE = 19;

   299     /**

   300      * General category "Pd" in the Unicode specification.

   301      * @since   1.1

   302      */

   303     public static final byte DASH_PUNCTUATION = 20;

   305     /**

   306      * General category "Ps" in the Unicode specification.

   307      * @since   1.1

   308      */

   309     public static final byte START_PUNCTUATION = 21;

   311     /**

   312      * General category "Pe" in the Unicode specification.

   313      * @since   1.1

   314      */

   315     public static final byte END_PUNCTUATION = 22;

   317     /**

   318      * General category "Pc" in the Unicode specification.

   319      * @since   1.1

   320      */

   321     public static final byte CONNECTOR_PUNCTUATION = 23;

   323     /**

   324      * General category "Po" in the Unicode specification.

   325      * @since   1.1

   326      */

   327     public static final byte OTHER_PUNCTUATION = 24;

   329     /**

   330      * General category "Sm" in the Unicode specification.

   331      * @since   1.1

   332      */

   333     public static final byte MATH_SYMBOL = 25;

   335     /**

   336      * General category "Sc" in the Unicode specification.

   337      * @since   1.1

   338      */

   339     public static final byte CURRENCY_SYMBOL = 26;

   341     /**

   342      * General category "Sk" in the Unicode specification.

   343      * @since   1.1

   344      */

   345     public static final byte MODIFIER_SYMBOL = 27;

   347     /**

   348      * General category "So" in the Unicode specification.

   349      * @since   1.1

   350      */

   351     public static final byte OTHER_SYMBOL = 28;

   353     /**

   354      * General category "Pi" in the Unicode specification.

   355      * @since   1.4

   356      */

   357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;

   359     /**

   360      * General category "Pf" in the Unicode specification.

   361      * @since   1.4

   362      */

   363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;

   365     /**

   366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.

   367      */

   368     static final int ERROR = 0xFFFFFFFF;

   371     /**

   372      * Undefined bidirectional character type. Undefined {@code char}

   373      * values have undefined directionality in the Unicode specification.

   374      * @since 1.4

   375      */

   376     public static final byte DIRECTIONALITY_UNDEFINED = -1;

   378     /**

   379      * Strong bidirectional character type "L" in the Unicode specification.

   380      * @since 1.4

   381      */

   382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;

   384     /**

   385      * Strong bidirectional character type "R" in the Unicode specification.

   386      * @since 1.4

   387      */

   388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;

   390     /**

   391     * Strong bidirectional character type "AL" in the Unicode specification.

   392      * @since 1.4

   393      */

   394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;

   396     /**

   397      * Weak bidirectional character type "EN" in the Unicode specification.

   398      * @since 1.4

   399      */

   400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;

   402     /**

   403      * Weak bidirectional character type "ES" in the Unicode specification.

   404      * @since 1.4

   405      */

   406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;

   408     /**

   409      * Weak bidirectional character type "ET" in the Unicode specification.

   410      * @since 1.4

   411      */

   412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;

   414     /**

   415      * Weak bidirectional character type "AN" in the Unicode specification.

   416      * @since 1.4

   417      */

   418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;

   420     /**

   421      * Weak bidirectional character type "CS" in the Unicode specification.

   422      * @since 1.4

   423      */

   424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;

   426     /**

   427      * Weak bidirectional character type "NSM" in the Unicode specification.

   428      * @since 1.4

   429      */

   430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;

   432     /**

   433      * Weak bidirectional character type "BN" in the Unicode specification.

   434      * @since 1.4

   435      */

   436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;

   438     /**

   439      * Neutral bidirectional character type "B" in the Unicode specification.

   440      * @since 1.4

   441      */

   442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;

   444     /**

   445      * Neutral bidirectional character type "S" in the Unicode specification.

   446      * @since 1.4

   447      */

   448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;

   450     /**

   451      * Neutral bidirectional character type "WS" in the Unicode specification.

   452      * @since 1.4

   453      */

   454     public static final byte DIRECTIONALITY_WHITESPACE = 12;

   456     /**

   457      * Neutral bidirectional character type "ON" in the Unicode specification.

   458      * @since 1.4

   459      */

   460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;

   462     /**

   463      * Strong bidirectional character type "LRE" in the Unicode specification.

   464      * @since 1.4

   465      */

   466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;

   468     /**

   469      * Strong bidirectional character type "LRO" in the Unicode specification.

   470      * @since 1.4

   471      */

   472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;

   474     /**

   475      * Strong bidirectional character type "RLE" in the Unicode specification.

   476      * @since 1.4

   477      */

   478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;

   480     /**

   481      * Strong bidirectional character type "RLO" in the Unicode specification.

   482      * @since 1.4

   483      */

   484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;

   486     /**

   487      * Weak bidirectional character type "PDF" in the Unicode specification.

   488      * @since 1.4

   489      */

   490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;

   492     /**

   493      * The minimum value of a

   494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   495      * Unicode high-surrogate code unit</a>

   496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.

   497      * A high-surrogate is also known as a <i>leading-surrogate</i>.

   498      *

   499      * @since 1.5

   500      */

   501     public static final char MIN_HIGH_SURROGATE = '\uD800';

   503     /**

   504      * The maximum value of a

   505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

   506      * Unicode high-surrogate code unit</a>

   507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.

   508      * A high-surrogate is also known as a <i>leading-surrogate</i>.

   509      *

   510      * @since 1.5

   511      */

   512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';

   514     /**

   515      * The minimum value of a

   516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   517      * Unicode low-surrogate code unit</a>

   518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.

   519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.

   520      *

   521      * @since 1.5

   522      */

   523     public static final char MIN_LOW_SURROGATE  = '\uDC00';

   525     /**

   526      * The maximum value of a

   527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

   528      * Unicode low-surrogate code unit</a>

   529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.

   530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.

   531      *

   532      * @since 1.5

   533      */

   534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';

   536     /**

   537      * The minimum value of a Unicode surrogate code unit in the

   538      * UTF-16 encoding, constant {@code '\u005CuD800'}.

   539      *

   540      * @since 1.5

   541      */

   542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;

   544     /**

   545      * The maximum value of a Unicode surrogate code unit in the

   546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.

   547      *

   548      * @since 1.5

   549      */

   550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;

   552     /**

   553      * The minimum value of a

   554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">

   555      * Unicode supplementary code point</a>, constant {@code U+10000}.

   556      *

   557      * @since 1.5

   558      */

   559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;

   561     /**

   562      * The minimum value of a

   563      * <a href="http://www.unicode.org/glossary/#code_point">

   564      * Unicode code point</a>, constant {@code U+0000}.

   565      *

   566      * @since 1.5

   567      */

   568     public static final int MIN_CODE_POINT = 0x000000;

   570     /**

   571      * The maximum value of a

   572      * <a href="http://www.unicode.org/glossary/#code_point">

   573      * Unicode code point</a>, constant {@code U+10FFFF}.

   574      *

   575      * @since 1.5

   576      */

   577     public static final int MAX_CODE_POINT = 0X10FFFF;

   580     /**

   581      * Instances of this class represent particular subsets of the Unicode

   582      * character set.  The only family of subsets defined in the

   583      * {@code Character} class is {@link Character.UnicodeBlock}.

   584      * Other portions of the Java API may define other subsets for their

   585      * own purposes.

   586      *

   587      * @since 1.2

   588      */

   589     public static class Subset  {

   591         private String name;

   593         /**

   594          * Constructs a new {@code Subset} instance.

   595          *

   596          * @param  name  The name of this subset

   597          * @exception NullPointerException if name is {@code null}

   598          */

   599         protected Subset(String name) {

   600             if (name == null) {

   601                 throw new NullPointerException("name");

   602             }

   603             this.name = name;

   604         }

   606         /**

   607          * Compares two {@code Subset} objects for equality.

   608          * This method returns {@code true} if and only if

   609          * {@code this} and the argument refer to the same

   610          * object; since this method is {@code final}, this

   611          * guarantee holds for all subclasses.

   612          */

   613         public final boolean equals(Object obj) {

   614             return (this == obj);

   615         }

   617         /**

   618          * Returns the standard hash code as defined by the

   619          * {@link Object#hashCode} method.  This method

   620          * is {@code final} in order to ensure that the

   621          * {@code equals} and {@code hashCode} methods will

   622          * be consistent in all subclasses.

   623          */

   624         public final int hashCode() {

   625             return super.hashCode();

   626         }

   628         /**

   629          * Returns the name of this subset.

   630          */

   631         public final String toString() {

   632             return name;

   633         }

   634     }

   636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt

   637     // for the latest specification of Unicode Blocks.

   639     /**

   640      * A family of character subsets representing the character blocks in the

   641      * Unicode specification. Character blocks generally define characters

   642      * used for a specific script or purpose. A character is contained by

   643      * at most one Unicode block.

   644      *

   645      * @since 1.2

   646      */

   647     public static final class UnicodeBlock extends Subset {

   649         private static Map<String, UnicodeBlock> map = new HashMap<>(256);

   651         /**

   652          * Creates a UnicodeBlock with the given identifier name.

   653          * This name must be the same as the block identifier.

   654          */

   655         private UnicodeBlock(String idName) {

   656             super(idName);

   657             map.put(idName, this);

   658         }

   660         /**

   661          * Creates a UnicodeBlock with the given identifier name and

   662          * alias name.

   663          */

   664         private UnicodeBlock(String idName, String alias) {

   665             this(idName);

   666             map.put(alias, this);

   667         }

   669         /**

   670          * Creates a UnicodeBlock with the given identifier name and

   671          * alias names.

   672          */

   673         private UnicodeBlock(String idName, String... aliases) {

   674             this(idName);

   675             for (String alias : aliases)

   676                 map.put(alias, this);

   677         }

   679         /**

   680          * Constant for the "Basic Latin" Unicode character block.

   681          * @since 1.2

   682          */

   683         public static final UnicodeBlock  BASIC_LATIN =

   684             new UnicodeBlock("BASIC_LATIN",

   685                              "BASIC LATIN",

   686                              "BASICLATIN");

   688         /**

   689          * Constant for the "Latin-1 Supplement" Unicode character block.

   690          * @since 1.2

   691          */

   692         public static final UnicodeBlock LATIN_1_SUPPLEMENT =

   693             new UnicodeBlock("LATIN_1_SUPPLEMENT",

   694                              "LATIN-1 SUPPLEMENT",

   695                              "LATIN-1SUPPLEMENT");

   697         /**

   698          * Constant for the "Latin Extended-A" Unicode character block.

   699          * @since 1.2

   700          */

   701         public static final UnicodeBlock LATIN_EXTENDED_A =

   702             new UnicodeBlock("LATIN_EXTENDED_A",

   703                              "LATIN EXTENDED-A",

   704                              "LATINEXTENDED-A");

   706         /**

   707          * Constant for the "Latin Extended-B" Unicode character block.

   708          * @since 1.2

   709          */

   710         public static final UnicodeBlock LATIN_EXTENDED_B =

   711             new UnicodeBlock("LATIN_EXTENDED_B",

   712                              "LATIN EXTENDED-B",

   713                              "LATINEXTENDED-B");

   715         /**

   716          * Constant for the "IPA Extensions" Unicode character block.

   717          * @since 1.2

   718          */

   719         public static final UnicodeBlock IPA_EXTENSIONS =

   720             new UnicodeBlock("IPA_EXTENSIONS",

   721                              "IPA EXTENSIONS",

   722                              "IPAEXTENSIONS");

   724         /**

   725          * Constant for the "Spacing Modifier Letters" Unicode character block.

   726          * @since 1.2

   727          */

   728         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =

   729             new UnicodeBlock("SPACING_MODIFIER_LETTERS",

   730                              "SPACING MODIFIER LETTERS",

   731                              "SPACINGMODIFIERLETTERS");

   733         /**

   734          * Constant for the "Combining Diacritical Marks" Unicode character block.

   735          * @since 1.2

   736          */

   737         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =

   738             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",

   739                              "COMBINING DIACRITICAL MARKS",

   740                              "COMBININGDIACRITICALMARKS");

   742         /**

   743          * Constant for the "Greek and Coptic" Unicode character block.

   744          * <p>

   745          * This block was previously known as the "Greek" block.

   746          *

   747          * @since 1.2

   748          */

   749         public static final UnicodeBlock GREEK =

   750             new UnicodeBlock("GREEK",

   751                              "GREEK AND COPTIC",

   752                              "GREEKANDCOPTIC");

   754         /**

   755          * Constant for the "Cyrillic" Unicode character block.

   756          * @since 1.2

   757          */

   758         public static final UnicodeBlock CYRILLIC =

   759             new UnicodeBlock("CYRILLIC");

   761         /**

   762          * Constant for the "Armenian" Unicode character block.

   763          * @since 1.2

   764          */

   765         public static final UnicodeBlock ARMENIAN =

   766             new UnicodeBlock("ARMENIAN");

   768         /**

   769          * Constant for the "Hebrew" Unicode character block.

   770          * @since 1.2

   771          */

   772         public static final UnicodeBlock HEBREW =

   773             new UnicodeBlock("HEBREW");

   775         /**

   776          * Constant for the "Arabic" Unicode character block.

   777          * @since 1.2

   778          */

   779         public static final UnicodeBlock ARABIC =

   780             new UnicodeBlock("ARABIC");

   782         /**

   783          * Constant for the "Devanagari" Unicode character block.

   784          * @since 1.2

   785          */

   786         public static final UnicodeBlock DEVANAGARI =

   787             new UnicodeBlock("DEVANAGARI");

   789         /**

   790          * Constant for the "Bengali" Unicode character block.

   791          * @since 1.2

   792          */

   793         public static final UnicodeBlock BENGALI =

   794             new UnicodeBlock("BENGALI");

   796         /**

   797          * Constant for the "Gurmukhi" Unicode character block.

   798          * @since 1.2

   799          */

   800         public static final UnicodeBlock GURMUKHI =

   801             new UnicodeBlock("GURMUKHI");

   803         /**

   804          * Constant for the "Gujarati" Unicode character block.

   805          * @since 1.2

   806          */

   807         public static final UnicodeBlock GUJARATI =

   808             new UnicodeBlock("GUJARATI");

   810         /**

   811          * Constant for the "Oriya" Unicode character block.

   812          * @since 1.2

   813          */

   814         public static final UnicodeBlock ORIYA =

   815             new UnicodeBlock("ORIYA");

   817         /**

   818          * Constant for the "Tamil" Unicode character block.

   819          * @since 1.2

   820          */

   821         public static final UnicodeBlock TAMIL =

   822             new UnicodeBlock("TAMIL");

   824         /**

   825          * Constant for the "Telugu" Unicode character block.

   826          * @since 1.2

   827          */

   828         public static final UnicodeBlock TELUGU =

   829             new UnicodeBlock("TELUGU");

   831         /**

   832          * Constant for the "Kannada" Unicode character block.

   833          * @since 1.2

   834          */

   835         public static final UnicodeBlock KANNADA =

   836             new UnicodeBlock("KANNADA");

   838         /**

   839          * Constant for the "Malayalam" Unicode character block.

   840          * @since 1.2

   841          */

   842         public static final UnicodeBlock MALAYALAM =

   843             new UnicodeBlock("MALAYALAM");

   845         /**

   846          * Constant for the "Thai" Unicode character block.

   847          * @since 1.2

   848          */

   849         public static final UnicodeBlock THAI =

   850             new UnicodeBlock("THAI");

   852         /**

   853          * Constant for the "Lao" Unicode character block.

   854          * @since 1.2

   855          */

   856         public static final UnicodeBlock LAO =

   857             new UnicodeBlock("LAO");

   859         /**

   860          * Constant for the "Tibetan" Unicode character block.

   861          * @since 1.2

   862          */

   863         public static final UnicodeBlock TIBETAN =

   864             new UnicodeBlock("TIBETAN");

   866         /**

   867          * Constant for the "Georgian" Unicode character block.

   868          * @since 1.2

   869          */

   870         public static final UnicodeBlock GEORGIAN =

   871             new UnicodeBlock("GEORGIAN");

   873         /**

   874          * Constant for the "Hangul Jamo" Unicode character block.

   875          * @since 1.2

   876          */

   877         public static final UnicodeBlock HANGUL_JAMO =

   878             new UnicodeBlock("HANGUL_JAMO",

   879                              "HANGUL JAMO",

   880                              "HANGULJAMO");

   882         /**

   883          * Constant for the "Latin Extended Additional" Unicode character block.

   884          * @since 1.2

   885          */

   886         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =

   887             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",

   888                              "LATIN EXTENDED ADDITIONAL",

   889                              "LATINEXTENDEDADDITIONAL");

   891         /**

   892          * Constant for the "Greek Extended" Unicode character block.

   893          * @since 1.2

   894          */

   895         public static final UnicodeBlock GREEK_EXTENDED =

   896             new UnicodeBlock("GREEK_EXTENDED",

   897                              "GREEK EXTENDED",

   898                              "GREEKEXTENDED");

   900         /**

   901          * Constant for the "General Punctuation" Unicode character block.

   902          * @since 1.2

   903          */

   904         public static final UnicodeBlock GENERAL_PUNCTUATION =

   905             new UnicodeBlock("GENERAL_PUNCTUATION",

   906                              "GENERAL PUNCTUATION",

   907                              "GENERALPUNCTUATION");

   909         /**

   910          * Constant for the "Superscripts and Subscripts" Unicode character

   911          * block.

   912          * @since 1.2

   913          */

   914         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =

   915             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",

   916                              "SUPERSCRIPTS AND SUBSCRIPTS",

   917                              "SUPERSCRIPTSANDSUBSCRIPTS");

   919         /**

   920          * Constant for the "Currency Symbols" Unicode character block.

   921          * @since 1.2

   922          */

   923         public static final UnicodeBlock CURRENCY_SYMBOLS =

   924             new UnicodeBlock("CURRENCY_SYMBOLS",

   925                              "CURRENCY SYMBOLS",

   926                              "CURRENCYSYMBOLS");

   928         /**

   929          * Constant for the "Combining Diacritical Marks for Symbols" Unicode

   930          * character block.

   931          * <p>

   932          * This block was previously known as "Combining Marks for Symbols".

   933          * @since 1.2

   934          */

   935         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =

   936             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",

   937                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",

   938                              "COMBININGDIACRITICALMARKSFORSYMBOLS",

   939                              "COMBINING MARKS FOR SYMBOLS",

   940                              "COMBININGMARKSFORSYMBOLS");

   942         /**

   943          * Constant for the "Letterlike Symbols" Unicode character block.

   944          * @since 1.2

   945          */

   946         public static final UnicodeBlock LETTERLIKE_SYMBOLS =

   947             new UnicodeBlock("LETTERLIKE_SYMBOLS",

   948                              "LETTERLIKE SYMBOLS",

   949                              "LETTERLIKESYMBOLS");

   951         /**

   952          * Constant for the "Number Forms" Unicode character block.

   953          * @since 1.2

   954          */

   955         public static final UnicodeBlock NUMBER_FORMS =

   956             new UnicodeBlock("NUMBER_FORMS",

   957                              "NUMBER FORMS",

   958                              "NUMBERFORMS");

   960         /**

   961          * Constant for the "Arrows" Unicode character block.

   962          * @since 1.2

   963          */

   964         public static final UnicodeBlock ARROWS =

   965             new UnicodeBlock("ARROWS");

   967         /**

   968          * Constant for the "Mathematical Operators" Unicode character block.

   969          * @since 1.2

   970          */

   971         public static final UnicodeBlock MATHEMATICAL_OPERATORS =

   972             new UnicodeBlock("MATHEMATICAL_OPERATORS",

   973                              "MATHEMATICAL OPERATORS",

   974                              "MATHEMATICALOPERATORS");

   976         /**

   977          * Constant for the "Miscellaneous Technical" Unicode character block.

   978          * @since 1.2

   979          */

   980         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =

   981             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",

   982                              "MISCELLANEOUS TECHNICAL",

   983                              "MISCELLANEOUSTECHNICAL");

   985         /**

   986          * Constant for the "Control Pictures" Unicode character block.

   987          * @since 1.2

   988          */

   989         public static final UnicodeBlock CONTROL_PICTURES =

   990             new UnicodeBlock("CONTROL_PICTURES",

   991                              "CONTROL PICTURES",

   992                              "CONTROLPICTURES");

   994         /**

   995          * Constant for the "Optical Character Recognition" Unicode character block.

   996          * @since 1.2

   997          */

   998         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =

   999             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",

  1000                              "OPTICAL CHARACTER RECOGNITION",

  1001                              "OPTICALCHARACTERRECOGNITION");

  1003         /**

  1004          * Constant for the "Enclosed Alphanumerics" Unicode character block.

  1005          * @since 1.2

  1006          */

  1007         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =

  1008             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",

  1009                              "ENCLOSED ALPHANUMERICS",

  1010                              "ENCLOSEDALPHANUMERICS");

  1012         /**

  1013          * Constant for the "Box Drawing" Unicode character block.

  1014          * @since 1.2

  1015          */

  1016         public static final UnicodeBlock BOX_DRAWING =

  1017             new UnicodeBlock("BOX_DRAWING",

  1018                              "BOX DRAWING",

  1019                              "BOXDRAWING");

  1021         /**

  1022          * Constant for the "Block Elements" Unicode character block.

  1023          * @since 1.2

  1024          */

  1025         public static final UnicodeBlock BLOCK_ELEMENTS =

  1026             new UnicodeBlock("BLOCK_ELEMENTS",

  1027                              "BLOCK ELEMENTS",

  1028                              "BLOCKELEMENTS");

  1030         /**

  1031          * Constant for the "Geometric Shapes" Unicode character block.

  1032          * @since 1.2

  1033          */

  1034         public static final UnicodeBlock GEOMETRIC_SHAPES =

  1035             new UnicodeBlock("GEOMETRIC_SHAPES",

  1036                              "GEOMETRIC SHAPES",

  1037                              "GEOMETRICSHAPES");

  1039         /**

  1040          * Constant for the "Miscellaneous Symbols" Unicode character block.

  1041          * @since 1.2

  1042          */

  1043         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =

  1044             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",

  1045                              "MISCELLANEOUS SYMBOLS",

  1046                              "MISCELLANEOUSSYMBOLS");

  1048         /**

  1049          * Constant for the "Dingbats" Unicode character block.

  1050          * @since 1.2

  1051          */

  1052         public static final UnicodeBlock DINGBATS =

  1053             new UnicodeBlock("DINGBATS");

  1055         /**

  1056          * Constant for the "CJK Symbols and Punctuation" Unicode character block.

  1057          * @since 1.2

  1058          */

  1059         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =

  1060             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",

  1061                              "CJK SYMBOLS AND PUNCTUATION",

  1062                              "CJKSYMBOLSANDPUNCTUATION");

  1064         /**

  1065          * Constant for the "Hiragana" Unicode character block.

  1066          * @since 1.2

  1067          */

  1068         public static final UnicodeBlock HIRAGANA =

  1069             new UnicodeBlock("HIRAGANA");

  1071         /**

  1072          * Constant for the "Katakana" Unicode character block.

  1073          * @since 1.2

  1074          */

  1075         public static final UnicodeBlock KATAKANA =

  1076             new UnicodeBlock("KATAKANA");

  1078         /**

  1079          * Constant for the "Bopomofo" Unicode character block.

  1080          * @since 1.2

  1081          */

  1082         public static final UnicodeBlock BOPOMOFO =

  1083             new UnicodeBlock("BOPOMOFO");

  1085         /**

  1086          * Constant for the "Hangul Compatibility Jamo" Unicode character block.

  1087          * @since 1.2

  1088          */

  1089         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =

  1090             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",

  1091                              "HANGUL COMPATIBILITY JAMO",

  1092                              "HANGULCOMPATIBILITYJAMO");

  1094         /**

  1095          * Constant for the "Kanbun" Unicode character block.

  1096          * @since 1.2

  1097          */

  1098         public static final UnicodeBlock KANBUN =

  1099             new UnicodeBlock("KANBUN");

  1101         /**

  1102          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.

  1103          * @since 1.2

  1104          */

  1105         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =

  1106             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",

  1107                              "ENCLOSED CJK LETTERS AND MONTHS",

  1108                              "ENCLOSEDCJKLETTERSANDMONTHS");

  1110         /**

  1111          * Constant for the "CJK Compatibility" Unicode character block.

  1112          * @since 1.2

  1113          */

  1114         public static final UnicodeBlock CJK_COMPATIBILITY =

  1115             new UnicodeBlock("CJK_COMPATIBILITY",

  1116                              "CJK COMPATIBILITY",

  1117                              "CJKCOMPATIBILITY");

  1119         /**

  1120          * Constant for the "CJK Unified Ideographs" Unicode character block.

  1121          * @since 1.2

  1122          */

  1123         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =

  1124             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",

  1125                              "CJK UNIFIED IDEOGRAPHS",

  1126                              "CJKUNIFIEDIDEOGRAPHS");

  1128         /**

  1129          * Constant for the "Hangul Syllables" Unicode character block.

  1130          * @since 1.2

  1131          */

  1132         public static final UnicodeBlock HANGUL_SYLLABLES =

  1133             new UnicodeBlock("HANGUL_SYLLABLES",

  1134                              "HANGUL SYLLABLES",

  1135                              "HANGULSYLLABLES");

  1137         /**

  1138          * Constant for the "Private Use Area" Unicode character block.

  1139          * @since 1.2

  1140          */

  1141         public static final UnicodeBlock PRIVATE_USE_AREA =

  1142             new UnicodeBlock("PRIVATE_USE_AREA",

  1143                              "PRIVATE USE AREA",

  1144                              "PRIVATEUSEAREA");

  1146         /**

  1147          * Constant for the "CJK Compatibility Ideographs" Unicode character

  1148          * block.

  1149          * @since 1.2

  1150          */

  1151         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =

  1152             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",

  1153                              "CJK COMPATIBILITY IDEOGRAPHS",

  1154                              "CJKCOMPATIBILITYIDEOGRAPHS");

  1156         /**

  1157          * Constant for the "Alphabetic Presentation Forms" Unicode character block.

  1158          * @since 1.2

  1159          */

  1160         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =

  1161             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",

  1162                              "ALPHABETIC PRESENTATION FORMS",

  1163                              "ALPHABETICPRESENTATIONFORMS");

  1165         /**

  1166          * Constant for the "Arabic Presentation Forms-A" Unicode character

  1167          * block.

  1168          * @since 1.2

  1169          */

  1170         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =

  1171             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",

  1172                              "ARABIC PRESENTATION FORMS-A",

  1173                              "ARABICPRESENTATIONFORMS-A");

  1175         /**

  1176          * Constant for the "Combining Half Marks" Unicode character block.

  1177          * @since 1.2

  1178          */

  1179         public static final UnicodeBlock COMBINING_HALF_MARKS =

  1180             new UnicodeBlock("COMBINING_HALF_MARKS",

  1181                              "COMBINING HALF MARKS",

  1182                              "COMBININGHALFMARKS");

  1184         /**

  1185          * Constant for the "CJK Compatibility Forms" Unicode character block.

  1186          * @since 1.2

  1187          */

  1188         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =

  1189             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",

  1190                              "CJK COMPATIBILITY FORMS",

  1191                              "CJKCOMPATIBILITYFORMS");

  1193         /**

  1194          * Constant for the "Small Form Variants" Unicode character block.

  1195          * @since 1.2

  1196          */

  1197         public static final UnicodeBlock SMALL_FORM_VARIANTS =

  1198             new UnicodeBlock("SMALL_FORM_VARIANTS",

  1199                              "SMALL FORM VARIANTS",

  1200                              "SMALLFORMVARIANTS");

  1202         /**

  1203          * Constant for the "Arabic Presentation Forms-B" Unicode character block.

  1204          * @since 1.2

  1205          */

  1206         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =

  1207             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",

  1208                              "ARABIC PRESENTATION FORMS-B",

  1209                              "ARABICPRESENTATIONFORMS-B");

  1211         /**

  1212          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character

  1213          * block.

  1214          * @since 1.2

  1215          */

  1216         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =

  1217             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",

  1218                              "HALFWIDTH AND FULLWIDTH FORMS",

  1219                              "HALFWIDTHANDFULLWIDTHFORMS");

  1221         /**

  1222          * Constant for the "Specials" Unicode character block.

  1223          * @since 1.2

  1224          */

  1225         public static final UnicodeBlock SPECIALS =

  1226             new UnicodeBlock("SPECIALS");

  1228         /**

  1229          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},

  1230          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and

  1231          *             {@link #LOW_SURROGATES}. These new constants match

  1232          *             the block definitions of the Unicode Standard.

  1233          *             The {@link #of(char)} and {@link #of(int)} methods

  1234          *             return the new constants, not SURROGATES_AREA.

  1235          */

  1236         @Deprecated

  1237         public static final UnicodeBlock SURROGATES_AREA =

  1238             new UnicodeBlock("SURROGATES_AREA");

  1240         /**

  1241          * Constant for the "Syriac" Unicode character block.

  1242          * @since 1.4

  1243          */

  1244         public static final UnicodeBlock SYRIAC =

  1245             new UnicodeBlock("SYRIAC");

  1247         /**

  1248          * Constant for the "Thaana" Unicode character block.

  1249          * @since 1.4

  1250          */

  1251         public static final UnicodeBlock THAANA =

  1252             new UnicodeBlock("THAANA");

  1254         /**

  1255          * Constant for the "Sinhala" Unicode character block.

  1256          * @since 1.4

  1257          */

  1258         public static final UnicodeBlock SINHALA =

  1259             new UnicodeBlock("SINHALA");

  1261         /**

  1262          * Constant for the "Myanmar" Unicode character block.

  1263          * @since 1.4

  1264          */

  1265         public static final UnicodeBlock MYANMAR =

  1266             new UnicodeBlock("MYANMAR");

  1268         /**

  1269          * Constant for the "Ethiopic" Unicode character block.

  1270          * @since 1.4

  1271          */

  1272         public static final UnicodeBlock ETHIOPIC =

  1273             new UnicodeBlock("ETHIOPIC");

  1275         /**

  1276          * Constant for the "Cherokee" Unicode character block.

  1277          * @since 1.4

  1278          */

  1279         public static final UnicodeBlock CHEROKEE =

  1280             new UnicodeBlock("CHEROKEE");

  1282         /**

  1283          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.

  1284          * @since 1.4

  1285          */

  1286         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =

  1287             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",

  1288                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",

  1289                              "UNIFIEDCANADIANABORIGINALSYLLABICS");

  1291         /**

  1292          * Constant for the "Ogham" Unicode character block.

  1293          * @since 1.4

  1294          */

  1295         public static final UnicodeBlock OGHAM =

  1296             new UnicodeBlock("OGHAM");

  1298         /**

  1299          * Constant for the "Runic" Unicode character block.

  1300          * @since 1.4

  1301          */

  1302         public static final UnicodeBlock RUNIC =

  1303             new UnicodeBlock("RUNIC");

  1305         /**

  1306          * Constant for the "Khmer" Unicode character block.

  1307          * @since 1.4

  1308          */

  1309         public static final UnicodeBlock KHMER =

  1310             new UnicodeBlock("KHMER");

  1312         /**

  1313          * Constant for the "Mongolian" Unicode character block.

  1314          * @since 1.4

  1315          */

  1316         public static final UnicodeBlock MONGOLIAN =

  1317             new UnicodeBlock("MONGOLIAN");

  1319         /**

  1320          * Constant for the "Braille Patterns" Unicode character block.

  1321          * @since 1.4

  1322          */

  1323         public static final UnicodeBlock BRAILLE_PATTERNS =

  1324             new UnicodeBlock("BRAILLE_PATTERNS",

  1325                              "BRAILLE PATTERNS",

  1326                              "BRAILLEPATTERNS");

  1328         /**

  1329          * Constant for the "CJK Radicals Supplement" Unicode character block.

  1330          * @since 1.4

  1331          */

  1332         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =

  1333             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",

  1334                              "CJK RADICALS SUPPLEMENT",

  1335                              "CJKRADICALSSUPPLEMENT");

  1337         /**

  1338          * Constant for the "Kangxi Radicals" Unicode character block.

  1339          * @since 1.4

  1340          */

  1341         public static final UnicodeBlock KANGXI_RADICALS =

  1342             new UnicodeBlock("KANGXI_RADICALS",

  1343                              "KANGXI RADICALS",

  1344                              "KANGXIRADICALS");

  1346         /**

  1347          * Constant for the "Ideographic Description Characters" Unicode character block.

  1348          * @since 1.4

  1349          */

  1350         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =

  1351             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",

  1352                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",

  1353                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");

  1355         /**

  1356          * Constant for the "Bopomofo Extended" Unicode character block.

  1357          * @since 1.4

  1358          */

  1359         public static final UnicodeBlock BOPOMOFO_EXTENDED =

  1360             new UnicodeBlock("BOPOMOFO_EXTENDED",

  1361                              "BOPOMOFO EXTENDED",

  1362                              "BOPOMOFOEXTENDED");

  1364         /**

  1365          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.

  1366          * @since 1.4

  1367          */

  1368         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =

  1369             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",

  1370                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",

  1371                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");

  1373         /**

  1374          * Constant for the "Yi Syllables" Unicode character block.

  1375          * @since 1.4

  1376          */

  1377         public static final UnicodeBlock YI_SYLLABLES =

  1378             new UnicodeBlock("YI_SYLLABLES",

  1379                              "YI SYLLABLES",

  1380                              "YISYLLABLES");

  1382         /**

  1383          * Constant for the "Yi Radicals" Unicode character block.

  1384          * @since 1.4

  1385          */

  1386         public static final UnicodeBlock YI_RADICALS =

  1387             new UnicodeBlock("YI_RADICALS",

  1388                              "YI RADICALS",

  1389                              "YIRADICALS");

  1391         /**

  1392          * Constant for the "Cyrillic Supplementary" Unicode character block.

  1393          * @since 1.5

  1394          */

  1395         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =

  1396             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",

  1397                              "CYRILLIC SUPPLEMENTARY",

  1398                              "CYRILLICSUPPLEMENTARY",

  1399                              "CYRILLIC SUPPLEMENT",

  1400                              "CYRILLICSUPPLEMENT");

  1402         /**

  1403          * Constant for the "Tagalog" Unicode character block.

  1404          * @since 1.5

  1405          */

  1406         public static final UnicodeBlock TAGALOG =

  1407             new UnicodeBlock("TAGALOG");

  1409         /**

  1410          * Constant for the "Hanunoo" Unicode character block.

  1411          * @since 1.5

  1412          */

  1413         public static final UnicodeBlock HANUNOO =

  1414             new UnicodeBlock("HANUNOO");

  1416         /**

  1417          * Constant for the "Buhid" Unicode character block.

  1418          * @since 1.5

  1419          */

  1420         public static final UnicodeBlock BUHID =

  1421             new UnicodeBlock("BUHID");

  1423         /**

  1424          * Constant for the "Tagbanwa" Unicode character block.

  1425          * @since 1.5

  1426          */

  1427         public static final UnicodeBlock TAGBANWA =

  1428             new UnicodeBlock("TAGBANWA");

  1430         /**

  1431          * Constant for the "Limbu" Unicode character block.

  1432          * @since 1.5

  1433          */

  1434         public static final UnicodeBlock LIMBU =

  1435             new UnicodeBlock("LIMBU");

  1437         /**

  1438          * Constant for the "Tai Le" Unicode character block.

  1439          * @since 1.5

  1440          */

  1441         public static final UnicodeBlock TAI_LE =

  1442             new UnicodeBlock("TAI_LE",

  1443                              "TAI LE",

  1444                              "TAILE");

  1446         /**

  1447          * Constant for the "Khmer Symbols" Unicode character block.

  1448          * @since 1.5

  1449          */

  1450         public static final UnicodeBlock KHMER_SYMBOLS =

  1451             new UnicodeBlock("KHMER_SYMBOLS",

  1452                              "KHMER SYMBOLS",

  1453                              "KHMERSYMBOLS");

  1455         /**

  1456          * Constant for the "Phonetic Extensions" Unicode character block.

  1457          * @since 1.5

  1458          */

  1459         public static final UnicodeBlock PHONETIC_EXTENSIONS =

  1460             new UnicodeBlock("PHONETIC_EXTENSIONS",

  1461                              "PHONETIC EXTENSIONS",

  1462                              "PHONETICEXTENSIONS");

  1464         /**

  1465          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.

  1466          * @since 1.5

  1467          */

  1468         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =

  1469             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",

  1470                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",

  1471                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");

  1473         /**

  1474          * Constant for the "Supplemental Arrows-A" Unicode character block.

  1475          * @since 1.5

  1476          */

  1477         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =

  1478             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",

  1479                              "SUPPLEMENTAL ARROWS-A",

  1480                              "SUPPLEMENTALARROWS-A");

  1482         /**

  1483          * Constant for the "Supplemental Arrows-B" Unicode character block.

  1484          * @since 1.5

  1485          */

  1486         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =

  1487             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",

  1488                              "SUPPLEMENTAL ARROWS-B",

  1489                              "SUPPLEMENTALARROWS-B");

  1491         /**

  1492          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode

  1493          * character block.

  1494          * @since 1.5

  1495          */

  1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =

  1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",

  1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",

  1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");

  1501         /**

  1502          * Constant for the "Supplemental Mathematical Operators" Unicode

  1503          * character block.

  1504          * @since 1.5

  1505          */

  1506         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =

  1507             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",

  1508                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",

  1509                              "SUPPLEMENTALMATHEMATICALOPERATORS");

  1511         /**

  1512          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character

  1513          * block.

  1514          * @since 1.5

  1515          */

  1516         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =

  1517             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",

  1518                              "MISCELLANEOUS SYMBOLS AND ARROWS",

  1519                              "MISCELLANEOUSSYMBOLSANDARROWS");

  1521         /**

  1522          * Constant for the "Katakana Phonetic Extensions" Unicode character

  1523          * block.

  1524          * @since 1.5

  1525          */

  1526         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =

  1527             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",

  1528                              "KATAKANA PHONETIC EXTENSIONS",

  1529                              "KATAKANAPHONETICEXTENSIONS");

  1531         /**

  1532          * Constant for the "Yijing Hexagram Symbols" Unicode character block.

  1533          * @since 1.5

  1534          */

  1535         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =

  1536             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",

  1537                              "YIJING HEXAGRAM SYMBOLS",

  1538                              "YIJINGHEXAGRAMSYMBOLS");

  1540         /**

  1541          * Constant for the "Variation Selectors" Unicode character block.

  1542          * @since 1.5

  1543          */

  1544         public static final UnicodeBlock VARIATION_SELECTORS =

  1545             new UnicodeBlock("VARIATION_SELECTORS",

  1546                              "VARIATION SELECTORS",

  1547                              "VARIATIONSELECTORS");

  1549         /**

  1550          * Constant for the "Linear B Syllabary" Unicode character block.

  1551          * @since 1.5

  1552          */

  1553         public static final UnicodeBlock LINEAR_B_SYLLABARY =

  1554             new UnicodeBlock("LINEAR_B_SYLLABARY",

  1555                              "LINEAR B SYLLABARY",

  1556                              "LINEARBSYLLABARY");

  1558         /**

  1559          * Constant for the "Linear B Ideograms" Unicode character block.

  1560          * @since 1.5

  1561          */

  1562         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =

  1563             new UnicodeBlock("LINEAR_B_IDEOGRAMS",

  1564                              "LINEAR B IDEOGRAMS",

  1565                              "LINEARBIDEOGRAMS");

  1567         /**

  1568          * Constant for the "Aegean Numbers" Unicode character block.

  1569          * @since 1.5

  1570          */

  1571         public static final UnicodeBlock AEGEAN_NUMBERS =

  1572             new UnicodeBlock("AEGEAN_NUMBERS",

  1573                              "AEGEAN NUMBERS",

  1574                              "AEGEANNUMBERS");

  1576         /**

  1577          * Constant for the "Old Italic" Unicode character block.

  1578          * @since 1.5

  1579          */

  1580         public static final UnicodeBlock OLD_ITALIC =

  1581             new UnicodeBlock("OLD_ITALIC",

  1582                              "OLD ITALIC",

  1583                              "OLDITALIC");

  1585         /**

  1586          * Constant for the "Gothic" Unicode character block.

  1587          * @since 1.5

  1588          */

  1589         public static final UnicodeBlock GOTHIC =

  1590             new UnicodeBlock("GOTHIC");

  1592         /**

  1593          * Constant for the "Ugaritic" Unicode character block.

  1594          * @since 1.5

  1595          */

  1596         public static final UnicodeBlock UGARITIC =

  1597             new UnicodeBlock("UGARITIC");

  1599         /**

  1600          * Constant for the "Deseret" Unicode character block.

  1601          * @since 1.5

  1602          */

  1603         public static final UnicodeBlock DESERET =

  1604             new UnicodeBlock("DESERET");

  1606         /**

  1607          * Constant for the "Shavian" Unicode character block.

  1608          * @since 1.5

  1609          */

  1610         public static final UnicodeBlock SHAVIAN =

  1611             new UnicodeBlock("SHAVIAN");

  1613         /**

  1614          * Constant for the "Osmanya" Unicode character block.

  1615          * @since 1.5

  1616          */

  1617         public static final UnicodeBlock OSMANYA =

  1618             new UnicodeBlock("OSMANYA");

  1620         /**

  1621          * Constant for the "Cypriot Syllabary" Unicode character block.

  1622          * @since 1.5

  1623          */

  1624         public static final UnicodeBlock CYPRIOT_SYLLABARY =

  1625             new UnicodeBlock("CYPRIOT_SYLLABARY",

  1626                              "CYPRIOT SYLLABARY",

  1627                              "CYPRIOTSYLLABARY");

  1629         /**

  1630          * Constant for the "Byzantine Musical Symbols" Unicode character block.

  1631          * @since 1.5

  1632          */

  1633         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =

  1634             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",

  1635                              "BYZANTINE MUSICAL SYMBOLS",

  1636                              "BYZANTINEMUSICALSYMBOLS");

  1638         /**

  1639          * Constant for the "Musical Symbols" Unicode character block.

  1640          * @since 1.5

  1641          */

  1642         public static final UnicodeBlock MUSICAL_SYMBOLS =

  1643             new UnicodeBlock("MUSICAL_SYMBOLS",

  1644                              "MUSICAL SYMBOLS",

  1645                              "MUSICALSYMBOLS");

  1647         /**

  1648          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.

  1649          * @since 1.5

  1650          */

  1651         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =

  1652             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",

  1653                              "TAI XUAN JING SYMBOLS",

  1654                              "TAIXUANJINGSYMBOLS");

  1656         /**

  1657          * Constant for the "Mathematical Alphanumeric Symbols" Unicode

  1658          * character block.

  1659          * @since 1.5

  1660          */

  1661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =

  1662             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",

  1663                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",

  1664                              "MATHEMATICALALPHANUMERICSYMBOLS");

  1666         /**

  1667          * Constant for the "CJK Unified Ideographs Extension B" Unicode

  1668          * character block.

  1669          * @since 1.5

  1670          */

  1671         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =

  1672             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",

  1673                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",

  1674                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");

  1676         /**

  1677          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.

  1678          * @since 1.5

  1679          */

  1680         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =

  1681             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",

  1682                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",

  1683                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");

  1685         /**

  1686          * Constant for the "Tags" Unicode character block.

  1687          * @since 1.5

  1688          */

  1689         public static final UnicodeBlock TAGS =

  1690             new UnicodeBlock("TAGS");

  1692         /**

  1693          * Constant for the "Variation Selectors Supplement" Unicode character

  1694          * block.

  1695          * @since 1.5

  1696          */

  1697         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =

  1698             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",

  1699                              "VARIATION SELECTORS SUPPLEMENT",

  1700                              "VARIATIONSELECTORSSUPPLEMENT");

  1702         /**

  1703          * Constant for the "Supplementary Private Use Area-A" Unicode character

  1704          * block.

  1705          * @since 1.5

  1706          */

  1707         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =

  1708             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",

  1709                              "SUPPLEMENTARY PRIVATE USE AREA-A",

  1710                              "SUPPLEMENTARYPRIVATEUSEAREA-A");

  1712         /**

  1713          * Constant for the "Supplementary Private Use Area-B" Unicode character

  1714          * block.

  1715          * @since 1.5

  1716          */

  1717         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =

  1718             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",

  1719                              "SUPPLEMENTARY PRIVATE USE AREA-B",

  1720                              "SUPPLEMENTARYPRIVATEUSEAREA-B");

  1722         /**

  1723          * Constant for the "High Surrogates" Unicode character block.

  1724          * This block represents codepoint values in the high surrogate

  1725          * range: U+D800 through U+DB7F

  1726          *

  1727          * @since 1.5

  1728          */

  1729         public static final UnicodeBlock HIGH_SURROGATES =

  1730             new UnicodeBlock("HIGH_SURROGATES",

  1731                              "HIGH SURROGATES",

  1732                              "HIGHSURROGATES");

  1734         /**

  1735          * Constant for the "High Private Use Surrogates" Unicode character

  1736          * block.

  1737          * This block represents codepoint values in the private use high

  1738          * surrogate range: U+DB80 through U+DBFF

  1739          *

  1740          * @since 1.5

  1741          */

  1742         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =

  1743             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",

  1744                              "HIGH PRIVATE USE SURROGATES",

  1745                              "HIGHPRIVATEUSESURROGATES");

  1747         /**

  1748          * Constant for the "Low Surrogates" Unicode character block.

  1749          * This block represents codepoint values in the low surrogate

  1750          * range: U+DC00 through U+DFFF

  1751          *

  1752          * @since 1.5

  1753          */

  1754         public static final UnicodeBlock LOW_SURROGATES =

  1755             new UnicodeBlock("LOW_SURROGATES",

  1756                              "LOW SURROGATES",

  1757                              "LOWSURROGATES");

  1759         /**

  1760          * Constant for the "Arabic Supplement" Unicode character block.

  1761          * @since 1.7

  1762          */

  1763         public static final UnicodeBlock ARABIC_SUPPLEMENT =

  1764             new UnicodeBlock("ARABIC_SUPPLEMENT",

  1765                              "ARABIC SUPPLEMENT",

  1766                              "ARABICSUPPLEMENT");

  1768         /**

  1769          * Constant for the "NKo" Unicode character block.

  1770          * @since 1.7

  1771          */

  1772         public static final UnicodeBlock NKO =

  1773             new UnicodeBlock("NKO");

  1775         /**

  1776          * Constant for the "Samaritan" Unicode character block.

  1777          * @since 1.7

  1778          */

  1779         public static final UnicodeBlock SAMARITAN =

  1780             new UnicodeBlock("SAMARITAN");

  1782         /**

  1783          * Constant for the "Mandaic" Unicode character block.

  1784          * @since 1.7

  1785          */

  1786         public static final UnicodeBlock MANDAIC =

  1787             new UnicodeBlock("MANDAIC");

  1789         /**

  1790          * Constant for the "Ethiopic Supplement" Unicode character block.

  1791          * @since 1.7

  1792          */

  1793         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =

  1794             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",

  1795                              "ETHIOPIC SUPPLEMENT",

  1796                              "ETHIOPICSUPPLEMENT");

  1798         /**

  1799          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"

  1800          * Unicode character block.

  1801          * @since 1.7

  1802          */

  1803         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =

  1804             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",

  1805                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",

  1806                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");

  1808         /**

  1809          * Constant for the "New Tai Lue" Unicode character block.

  1810          * @since 1.7

  1811          */

  1812         public static final UnicodeBlock NEW_TAI_LUE =

  1813             new UnicodeBlock("NEW_TAI_LUE",

  1814                              "NEW TAI LUE",

  1815                              "NEWTAILUE");

  1817         /**

  1818          * Constant for the "Buginese" Unicode character block.

  1819          * @since 1.7

  1820          */

  1821         public static final UnicodeBlock BUGINESE =

  1822             new UnicodeBlock("BUGINESE");

  1824         /**

  1825          * Constant for the "Tai Tham" Unicode character block.

  1826          * @since 1.7

  1827          */

  1828         public static final UnicodeBlock TAI_THAM =

  1829             new UnicodeBlock("TAI_THAM",

  1830                              "TAI THAM",

  1831                              "TAITHAM");

  1833         /**

  1834          * Constant for the "Balinese" Unicode character block.

  1835          * @since 1.7

  1836          */

  1837         public static final UnicodeBlock BALINESE =

  1838             new UnicodeBlock("BALINESE");

  1840         /**

  1841          * Constant for the "Sundanese" Unicode character block.

  1842          * @since 1.7

  1843          */

  1844         public static final UnicodeBlock SUNDANESE =

  1845             new UnicodeBlock("SUNDANESE");

  1847         /**

  1848          * Constant for the "Batak" Unicode character block.

  1849          * @since 1.7

  1850          */

  1851         public static final UnicodeBlock BATAK =

  1852             new UnicodeBlock("BATAK");

  1854         /**

  1855          * Constant for the "Lepcha" Unicode character block.

  1856          * @since 1.7

  1857          */

  1858         public static final UnicodeBlock LEPCHA =

  1859             new UnicodeBlock("LEPCHA");

  1861         /**

  1862          * Constant for the "Ol Chiki" Unicode character block.

  1863          * @since 1.7

  1864          */

  1865         public static final UnicodeBlock OL_CHIKI =

  1866             new UnicodeBlock("OL_CHIKI",

  1867                              "OL CHIKI",

  1868                              "OLCHIKI");

  1870         /**

  1871          * Constant for the "Vedic Extensions" Unicode character block.

  1872          * @since 1.7

  1873          */

  1874         public static final UnicodeBlock VEDIC_EXTENSIONS =

  1875             new UnicodeBlock("VEDIC_EXTENSIONS",

  1876                              "VEDIC EXTENSIONS",

  1877                              "VEDICEXTENSIONS");

  1879         /**

  1880          * Constant for the "Phonetic Extensions Supplement" Unicode character

  1881          * block.

  1882          * @since 1.7

  1883          */

  1884         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =

  1885             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",

  1886                              "PHONETIC EXTENSIONS SUPPLEMENT",

  1887                              "PHONETICEXTENSIONSSUPPLEMENT");

  1889         /**

  1890          * Constant for the "Combining Diacritical Marks Supplement" Unicode

  1891          * character block.

  1892          * @since 1.7

  1893          */

  1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =

  1895             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",

  1896                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",

  1897                              "COMBININGDIACRITICALMARKSSUPPLEMENT");

  1899         /**

  1900          * Constant for the "Glagolitic" Unicode character block.

  1901          * @since 1.7

  1902          */

  1903         public static final UnicodeBlock GLAGOLITIC =

  1904             new UnicodeBlock("GLAGOLITIC");

  1906         /**

  1907          * Constant for the "Latin Extended-C" Unicode character block.

  1908          * @since 1.7

  1909          */

  1910         public static final UnicodeBlock LATIN_EXTENDED_C =

  1911             new UnicodeBlock("LATIN_EXTENDED_C",

  1912                              "LATIN EXTENDED-C",

  1913                              "LATINEXTENDED-C");

  1915         /**

  1916          * Constant for the "Coptic" Unicode character block.

  1917          * @since 1.7

  1918          */

  1919         public static final UnicodeBlock COPTIC =

  1920             new UnicodeBlock("COPTIC");

  1922         /**

  1923          * Constant for the "Georgian Supplement" Unicode character block.

  1924          * @since 1.7

  1925          */

  1926         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =

  1927             new UnicodeBlock("GEORGIAN_SUPPLEMENT",

  1928                              "GEORGIAN SUPPLEMENT",

  1929                              "GEORGIANSUPPLEMENT");

  1931         /**

  1932          * Constant for the "Tifinagh" Unicode character block.

  1933          * @since 1.7

  1934          */

  1935         public static final UnicodeBlock TIFINAGH =

  1936             new UnicodeBlock("TIFINAGH");

  1938         /**

  1939          * Constant for the "Ethiopic Extended" Unicode character block.

  1940          * @since 1.7

  1941          */

  1942         public static final UnicodeBlock ETHIOPIC_EXTENDED =

  1943             new UnicodeBlock("ETHIOPIC_EXTENDED",

  1944                              "ETHIOPIC EXTENDED",

  1945                              "ETHIOPICEXTENDED");

  1947         /**

  1948          * Constant for the "Cyrillic Extended-A" Unicode character block.

  1949          * @since 1.7

  1950          */

  1951         public static final UnicodeBlock CYRILLIC_EXTENDED_A =

  1952             new UnicodeBlock("CYRILLIC_EXTENDED_A",

  1953                              "CYRILLIC EXTENDED-A",

  1954                              "CYRILLICEXTENDED-A");

  1956         /**

  1957          * Constant for the "Supplemental Punctuation" Unicode character block.

  1958          * @since 1.7

  1959          */

  1960         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =

  1961             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",

  1962                              "SUPPLEMENTAL PUNCTUATION",

  1963                              "SUPPLEMENTALPUNCTUATION");

  1965         /**

  1966          * Constant for the "CJK Strokes" Unicode character block.

  1967          * @since 1.7

  1968          */

  1969         public static final UnicodeBlock CJK_STROKES =

  1970             new UnicodeBlock("CJK_STROKES",

  1971                              "CJK STROKES",

  1972                              "CJKSTROKES");

  1974         /**

  1975          * Constant for the "Lisu" Unicode character block.

  1976          * @since 1.7

  1977          */

  1978         public static final UnicodeBlock LISU =

  1979             new UnicodeBlock("LISU");

  1981         /**

  1982          * Constant for the "Vai" Unicode character block.

  1983          * @since 1.7

  1984          */

  1985         public static final UnicodeBlock VAI =

  1986             new UnicodeBlock("VAI");

  1988         /**

  1989          * Constant for the "Cyrillic Extended-B" Unicode character block.

  1990          * @since 1.7

  1991          */

  1992         public static final UnicodeBlock CYRILLIC_EXTENDED_B =

  1993             new UnicodeBlock("CYRILLIC_EXTENDED_B",

  1994                              "CYRILLIC EXTENDED-B",

  1995                              "CYRILLICEXTENDED-B");

  1997         /**

  1998          * Constant for the "Bamum" Unicode character block.

  1999          * @since 1.7

  2000          */

  2001         public static final UnicodeBlock BAMUM =

  2002             new UnicodeBlock("BAMUM");

  2004         /**

  2005          * Constant for the "Modifier Tone Letters" Unicode character block.

  2006          * @since 1.7

  2007          */

  2008         public static final UnicodeBlock MODIFIER_TONE_LETTERS =

  2009             new UnicodeBlock("MODIFIER_TONE_LETTERS",

  2010                              "MODIFIER TONE LETTERS",

  2011                              "MODIFIERTONELETTERS");

  2013         /**

  2014          * Constant for the "Latin Extended-D" Unicode character block.

  2015          * @since 1.7

  2016          */

  2017         public static final UnicodeBlock LATIN_EXTENDED_D =

  2018             new UnicodeBlock("LATIN_EXTENDED_D",

  2019                              "LATIN EXTENDED-D",

  2020                              "LATINEXTENDED-D");

  2022         /**

  2023          * Constant for the "Syloti Nagri" Unicode character block.

  2024          * @since 1.7

  2025          */

  2026         public static final UnicodeBlock SYLOTI_NAGRI =

  2027             new UnicodeBlock("SYLOTI_NAGRI",

  2028                              "SYLOTI NAGRI",

  2029                              "SYLOTINAGRI");

  2031         /**

  2032          * Constant for the "Common Indic Number Forms" Unicode character block.

  2033          * @since 1.7

  2034          */

  2035         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =

  2036             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",

  2037                              "COMMON INDIC NUMBER FORMS",

  2038                              "COMMONINDICNUMBERFORMS");

  2040         /**

  2041          * Constant for the "Phags-pa" Unicode character block.

  2042          * @since 1.7

  2043          */

  2044         public static final UnicodeBlock PHAGS_PA =

  2045             new UnicodeBlock("PHAGS_PA",

  2046                              "PHAGS-PA");

  2048         /**

  2049          * Constant for the "Saurashtra" Unicode character block.

  2050          * @since 1.7

  2051          */

  2052         public static final UnicodeBlock SAURASHTRA =

  2053             new UnicodeBlock("SAURASHTRA");

  2055         /**

  2056          * Constant for the "Devanagari Extended" Unicode character block.

  2057          * @since 1.7

  2058          */

  2059         public static final UnicodeBlock DEVANAGARI_EXTENDED =

  2060             new UnicodeBlock("DEVANAGARI_EXTENDED",

  2061                              "DEVANAGARI EXTENDED",

  2062                              "DEVANAGARIEXTENDED");

  2064         /**

  2065          * Constant for the "Kayah Li" Unicode character block.

  2066          * @since 1.7

  2067          */

  2068         public static final UnicodeBlock KAYAH_LI =

  2069             new UnicodeBlock("KAYAH_LI",

  2070                              "KAYAH LI",

  2071                              "KAYAHLI");

  2073         /**

  2074          * Constant for the "Rejang" Unicode character block.

  2075          * @since 1.7

  2076          */

  2077         public static final UnicodeBlock REJANG =

  2078             new UnicodeBlock("REJANG");

  2080         /**

  2081          * Constant for the "Hangul Jamo Extended-A" Unicode character block.

  2082          * @since 1.7

  2083          */

  2084         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =

  2085             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",

  2086                              "HANGUL JAMO EXTENDED-A",

  2087                              "HANGULJAMOEXTENDED-A");

  2089         /**

  2090          * Constant for the "Javanese" Unicode character block.

  2091          * @since 1.7

  2092          */

  2093         public static final UnicodeBlock JAVANESE =

  2094             new UnicodeBlock("JAVANESE");

  2096         /**

  2097          * Constant for the "Cham" Unicode character block.

  2098          * @since 1.7

  2099          */

  2100         public static final UnicodeBlock CHAM =

  2101             new UnicodeBlock("CHAM");

  2103         /**

  2104          * Constant for the "Myanmar Extended-A" Unicode character block.

  2105          * @since 1.7

  2106          */

  2107         public static final UnicodeBlock MYANMAR_EXTENDED_A =

  2108             new UnicodeBlock("MYANMAR_EXTENDED_A",

  2109                              "MYANMAR EXTENDED-A",

  2110                              "MYANMAREXTENDED-A");

  2112         /**

  2113          * Constant for the "Tai Viet" Unicode character block.

  2114          * @since 1.7

  2115          */

  2116         public static final UnicodeBlock TAI_VIET =

  2117             new UnicodeBlock("TAI_VIET",

  2118                              "TAI VIET",

  2119                              "TAIVIET");

  2121         /**

  2122          * Constant for the "Ethiopic Extended-A" Unicode character block.

  2123          * @since 1.7

  2124          */

  2125         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =

  2126             new UnicodeBlock("ETHIOPIC_EXTENDED_A",

  2127                              "ETHIOPIC EXTENDED-A",

  2128                              "ETHIOPICEXTENDED-A");

  2130         /**

  2131          * Constant for the "Meetei Mayek" Unicode character block.

  2132          * @since 1.7

  2133          */

  2134         public static final UnicodeBlock MEETEI_MAYEK =

  2135             new UnicodeBlock("MEETEI_MAYEK",

  2136                              "MEETEI MAYEK",

  2137                              "MEETEIMAYEK");

  2139         /**

  2140          * Constant for the "Hangul Jamo Extended-B" Unicode character block.

  2141          * @since 1.7

  2142          */

  2143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =

  2144             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",

  2145                              "HANGUL JAMO EXTENDED-B",

  2146                              "HANGULJAMOEXTENDED-B");

  2148         /**

  2149          * Constant for the "Vertical Forms" Unicode character block.

  2150          * @since 1.7

  2151          */

  2152         public static final UnicodeBlock VERTICAL_FORMS =

  2153             new UnicodeBlock("VERTICAL_FORMS",

  2154                              "VERTICAL FORMS",

  2155                              "VERTICALFORMS");

  2157         /**

  2158          * Constant for the "Ancient Greek Numbers" Unicode character block.

  2159          * @since 1.7

  2160          */

  2161         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =

  2162             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",

  2163                              "ANCIENT GREEK NUMBERS",

  2164                              "ANCIENTGREEKNUMBERS");

  2166         /**

  2167          * Constant for the "Ancient Symbols" Unicode character block.

  2168          * @since 1.7

  2169          */

  2170         public static final UnicodeBlock ANCIENT_SYMBOLS =

  2171             new UnicodeBlock("ANCIENT_SYMBOLS",

  2172                              "ANCIENT SYMBOLS",

  2173                              "ANCIENTSYMBOLS");

  2175         /**

  2176          * Constant for the "Phaistos Disc" Unicode character block.

  2177          * @since 1.7

  2178          */

  2179         public static final UnicodeBlock PHAISTOS_DISC =

  2180             new UnicodeBlock("PHAISTOS_DISC",

  2181                              "PHAISTOS DISC",

  2182                              "PHAISTOSDISC");

  2184         /**

  2185          * Constant for the "Lycian" Unicode character block.

  2186          * @since 1.7

  2187          */

  2188         public static final UnicodeBlock LYCIAN =

  2189             new UnicodeBlock("LYCIAN");

  2191         /**

  2192          * Constant for the "Carian" Unicode character block.

  2193          * @since 1.7

  2194          */

  2195         public static final UnicodeBlock CARIAN =

  2196             new UnicodeBlock("CARIAN");

  2198         /**

  2199          * Constant for the "Old Persian" Unicode character block.

  2200          * @since 1.7

  2201          */

  2202         public static final UnicodeBlock OLD_PERSIAN =

  2203             new UnicodeBlock("OLD_PERSIAN",

  2204                              "OLD PERSIAN",

  2205                              "OLDPERSIAN");

  2207         /**

  2208          * Constant for the "Imperial Aramaic" Unicode character block.

  2209          * @since 1.7

  2210          */

  2211         public static final UnicodeBlock IMPERIAL_ARAMAIC =

  2212             new UnicodeBlock("IMPERIAL_ARAMAIC",

  2213                              "IMPERIAL ARAMAIC",

  2214                              "IMPERIALARAMAIC");

  2216         /**

  2217          * Constant for the "Phoenician" Unicode character block.

  2218          * @since 1.7

  2219          */

  2220         public static final UnicodeBlock PHOENICIAN =

  2221             new UnicodeBlock("PHOENICIAN");

  2223         /**

  2224          * Constant for the "Lydian" Unicode character block.

  2225          * @since 1.7

  2226          */

  2227         public static final UnicodeBlock LYDIAN =

  2228             new UnicodeBlock("LYDIAN");

  2230         /**

  2231          * Constant for the "Kharoshthi" Unicode character block.

  2232          * @since 1.7

  2233          */

  2234         public static final UnicodeBlock KHAROSHTHI =

  2235             new UnicodeBlock("KHAROSHTHI");

  2237         /**

  2238          * Constant for the "Old South Arabian" Unicode character block.

  2239          * @since 1.7

  2240          */

  2241         public static final UnicodeBlock OLD_SOUTH_ARABIAN =

  2242             new UnicodeBlock("OLD_SOUTH_ARABIAN",

  2243                              "OLD SOUTH ARABIAN",

  2244                              "OLDSOUTHARABIAN");

  2246         /**

  2247          * Constant for the "Avestan" Unicode character block.

  2248          * @since 1.7

  2249          */

  2250         public static final UnicodeBlock AVESTAN =

  2251             new UnicodeBlock("AVESTAN");

  2253         /**

  2254          * Constant for the "Inscriptional Parthian" Unicode character block.

  2255          * @since 1.7

  2256          */

  2257         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =

  2258             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",

  2259                              "INSCRIPTIONAL PARTHIAN",

  2260                              "INSCRIPTIONALPARTHIAN");

  2262         /**

  2263          * Constant for the "Inscriptional Pahlavi" Unicode character block.

  2264          * @since 1.7

  2265          */

  2266         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =

  2267             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",

  2268                              "INSCRIPTIONAL PAHLAVI",

  2269                              "INSCRIPTIONALPAHLAVI");

  2271         /**

  2272          * Constant for the "Old Turkic" Unicode character block.

  2273          * @since 1.7

  2274          */

  2275         public static final UnicodeBlock OLD_TURKIC =

  2276             new UnicodeBlock("OLD_TURKIC",

  2277                              "OLD TURKIC",

  2278                              "OLDTURKIC");

  2280         /**

  2281          * Constant for the "Rumi Numeral Symbols" Unicode character block.

  2282          * @since 1.7

  2283          */

  2284         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =

  2285             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",

  2286                              "RUMI NUMERAL SYMBOLS",

  2287                              "RUMINUMERALSYMBOLS");

  2289         /**

  2290          * Constant for the "Brahmi" Unicode character block.

  2291          * @since 1.7

  2292          */

  2293         public static final UnicodeBlock BRAHMI =

  2294             new UnicodeBlock("BRAHMI");

  2296         /**

  2297          * Constant for the "Kaithi" Unicode character block.

  2298          * @since 1.7

  2299          */

  2300         public static final UnicodeBlock KAITHI =

  2301             new UnicodeBlock("KAITHI");

  2303         /**

  2304          * Constant for the "Cuneiform" Unicode character block.

  2305          * @since 1.7

  2306          */

  2307         public static final UnicodeBlock CUNEIFORM =

  2308             new UnicodeBlock("CUNEIFORM");

  2310         /**

  2311          * Constant for the "Cuneiform Numbers and Punctuation" Unicode

  2312          * character block.

  2313          * @since 1.7

  2314          */

  2315         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =

  2316             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",

  2317                              "CUNEIFORM NUMBERS AND PUNCTUATION",

  2318                              "CUNEIFORMNUMBERSANDPUNCTUATION");

  2320         /**

  2321          * Constant for the "Egyptian Hieroglyphs" Unicode character block.

  2322          * @since 1.7

  2323          */

  2324         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =

  2325             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",

  2326                              "EGYPTIAN HIEROGLYPHS",

  2327                              "EGYPTIANHIEROGLYPHS");

  2329         /**

  2330          * Constant for the "Bamum Supplement" Unicode character block.

  2331          * @since 1.7

  2332          */

  2333         public static final UnicodeBlock BAMUM_SUPPLEMENT =

  2334             new UnicodeBlock("BAMUM_SUPPLEMENT",

  2335                              "BAMUM SUPPLEMENT",

  2336                              "BAMUMSUPPLEMENT");

  2338         /**

  2339          * Constant for the "Kana Supplement" Unicode character block.

  2340          * @since 1.7

  2341          */

  2342         public static final UnicodeBlock KANA_SUPPLEMENT =

  2343             new UnicodeBlock("KANA_SUPPLEMENT",

  2344                              "KANA SUPPLEMENT",

  2345                              "KANASUPPLEMENT");

  2347         /**

  2348          * Constant for the "Ancient Greek Musical Notation" Unicode character

  2349          * block.

  2350          * @since 1.7

  2351          */

  2352         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =

  2353             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",

  2354                              "ANCIENT GREEK MUSICAL NOTATION",

  2355                              "ANCIENTGREEKMUSICALNOTATION");

  2357         /**

  2358          * Constant for the "Counting Rod Numerals" Unicode character block.

  2359          * @since 1.7

  2360          */

  2361         public static final UnicodeBlock COUNTING_ROD_NUMERALS =

  2362             new UnicodeBlock("COUNTING_ROD_NUMERALS",

  2363                              "COUNTING ROD NUMERALS",

  2364                              "COUNTINGRODNUMERALS");

  2366         /**

  2367          * Constant for the "Mahjong Tiles" Unicode character block.

  2368          * @since 1.7

  2369          */

  2370         public static final UnicodeBlock MAHJONG_TILES =

  2371             new UnicodeBlock("MAHJONG_TILES",

  2372                              "MAHJONG TILES",

  2373                              "MAHJONGTILES");

  2375         /**

  2376          * Constant for the "Domino Tiles" Unicode character block.

  2377          * @since 1.7

  2378          */

  2379         public static final UnicodeBlock DOMINO_TILES =

  2380             new UnicodeBlock("DOMINO_TILES",

  2381                              "DOMINO TILES",

  2382                              "DOMINOTILES");

  2384         /**

  2385          * Constant for the "Playing Cards" Unicode character block.

  2386          * @since 1.7

  2387          */

  2388         public static final UnicodeBlock PLAYING_CARDS =

  2389             new UnicodeBlock("PLAYING_CARDS",

  2390                              "PLAYING CARDS",

  2391                              "PLAYINGCARDS");

  2393         /**

  2394          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character

  2395          * block.

  2396          * @since 1.7

  2397          */

  2398         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =

  2399             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",

  2400                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",

  2401                              "ENCLOSEDALPHANUMERICSUPPLEMENT");

  2403         /**

  2404          * Constant for the "Enclosed Ideographic Supplement" Unicode character

  2405          * block.

  2406          * @since 1.7

  2407          */

  2408         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =

  2409             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",

  2410                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",

  2411                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");

  2413         /**

  2414          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode

  2415          * character block.

  2416          * @since 1.7

  2417          */

  2418         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =

  2419             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",

  2420                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",

  2421                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");

  2423         /**

  2424          * Constant for the "Emoticons" Unicode character block.

  2425          * @since 1.7

  2426          */

  2427         public static final UnicodeBlock EMOTICONS =

  2428             new UnicodeBlock("EMOTICONS");

  2430         /**

  2431          * Constant for the "Transport And Map Symbols" Unicode character block.

  2432          * @since 1.7

  2433          */

  2434         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =

  2435             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",

  2436                              "TRANSPORT AND MAP SYMBOLS",

  2437                              "TRANSPORTANDMAPSYMBOLS");

  2439         /**

  2440          * Constant for the "Alchemical Symbols" Unicode character block.

  2441          * @since 1.7

  2442          */

  2443         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =

  2444             new UnicodeBlock("ALCHEMICAL_SYMBOLS",

  2445                              "ALCHEMICAL SYMBOLS",

  2446                              "ALCHEMICALSYMBOLS");

  2448         /**

  2449          * Constant for the "CJK Unified Ideographs Extension C" Unicode

  2450          * character block.

  2451          * @since 1.7

  2452          */

  2453         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =

  2454             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",

  2455                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",

  2456                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");

  2458         /**

  2459          * Constant for the "CJK Unified Ideographs Extension D" Unicode

  2460          * character block.

  2461          * @since 1.7

  2462          */

  2463         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =

  2464             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",

  2465                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",

  2466                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");

  2468         private static final int blockStarts[] = {

  2469             0x0000,   // 0000..007F; Basic Latin

  2470             0x0080,   // 0080..00FF; Latin-1 Supplement

  2471             0x0100,   // 0100..017F; Latin Extended-A

  2472             0x0180,   // 0180..024F; Latin Extended-B

  2473             0x0250,   // 0250..02AF; IPA Extensions

  2474             0x02B0,   // 02B0..02FF; Spacing Modifier Letters

  2475             0x0300,   // 0300..036F; Combining Diacritical Marks

  2476             0x0370,   // 0370..03FF; Greek and Coptic

  2477             0x0400,   // 0400..04FF; Cyrillic

  2478             0x0500,   // 0500..052F; Cyrillic Supplement

  2479             0x0530,   // 0530..058F; Armenian

  2480             0x0590,   // 0590..05FF; Hebrew

  2481             0x0600,   // 0600..06FF; Arabic

  2482             0x0700,   // 0700..074F; Syriac

  2483             0x0750,   // 0750..077F; Arabic Supplement

  2484             0x0780,   // 0780..07BF; Thaana

  2485             0x07C0,   // 07C0..07FF; NKo

  2486             0x0800,   // 0800..083F; Samaritan

  2487             0x0840,   // 0840..085F; Mandaic

  2488             0x0860,   //             unassigned

  2489             0x0900,   // 0900..097F; Devanagari

  2490             0x0980,   // 0980..09FF; Bengali

  2491             0x0A00,   // 0A00..0A7F; Gurmukhi

  2492             0x0A80,   // 0A80..0AFF; Gujarati

  2493             0x0B00,   // 0B00..0B7F; Oriya

  2494             0x0B80,   // 0B80..0BFF; Tamil

  2495             0x0C00,   // 0C00..0C7F; Telugu

  2496             0x0C80,   // 0C80..0CFF; Kannada

  2497             0x0D00,   // 0D00..0D7F; Malayalam

  2498             0x0D80,   // 0D80..0DFF; Sinhala

  2499             0x0E00,   // 0E00..0E7F; Thai

  2500             0x0E80,   // 0E80..0EFF; Lao

  2501             0x0F00,   // 0F00..0FFF; Tibetan

  2502             0x1000,   // 1000..109F; Myanmar

  2503             0x10A0,   // 10A0..10FF; Georgian

  2504             0x1100,   // 1100..11FF; Hangul Jamo

  2505             0x1200,   // 1200..137F; Ethiopic

  2506             0x1380,   // 1380..139F; Ethiopic Supplement

  2507             0x13A0,   // 13A0..13FF; Cherokee

  2508             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics

  2509             0x1680,   // 1680..169F; Ogham

  2510             0x16A0,   // 16A0..16FF; Runic

  2511             0x1700,   // 1700..171F; Tagalog

  2512             0x1720,   // 1720..173F; Hanunoo

  2513             0x1740,   // 1740..175F; Buhid

  2514             0x1760,   // 1760..177F; Tagbanwa

  2515             0x1780,   // 1780..17FF; Khmer

  2516             0x1800,   // 1800..18AF; Mongolian

  2517             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended

  2518             0x1900,   // 1900..194F; Limbu

  2519             0x1950,   // 1950..197F; Tai Le

  2520             0x1980,   // 1980..19DF; New Tai Lue

  2521             0x19E0,   // 19E0..19FF; Khmer Symbols

  2522             0x1A00,   // 1A00..1A1F; Buginese

  2523             0x1A20,   // 1A20..1AAF; Tai Tham

  2524             0x1AB0,   //             unassigned

  2525             0x1B00,   // 1B00..1B7F; Balinese

  2526             0x1B80,   // 1B80..1BBF; Sundanese

  2527             0x1BC0,   // 1BC0..1BFF; Batak

  2528             0x1C00,   // 1C00..1C4F; Lepcha

  2529             0x1C50,   // 1C50..1C7F; Ol Chiki

  2530             0x1C80,   //             unassigned

  2531             0x1CD0,   // 1CD0..1CFF; Vedic Extensions

  2532             0x1D00,   // 1D00..1D7F; Phonetic Extensions

  2533             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement

  2534             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement

  2535             0x1E00,   // 1E00..1EFF; Latin Extended Additional

  2536             0x1F00,   // 1F00..1FFF; Greek Extended

  2537             0x2000,   // 2000..206F; General Punctuation

  2538             0x2070,   // 2070..209F; Superscripts and Subscripts

  2539             0x20A0,   // 20A0..20CF; Currency Symbols

  2540             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols

  2541             0x2100,   // 2100..214F; Letterlike Symbols

  2542             0x2150,   // 2150..218F; Number Forms

  2543             0x2190,   // 2190..21FF; Arrows

  2544             0x2200,   // 2200..22FF; Mathematical Operators

  2545             0x2300,   // 2300..23FF; Miscellaneous Technical

  2546             0x2400,   // 2400..243F; Control Pictures

  2547             0x2440,   // 2440..245F; Optical Character Recognition

  2548             0x2460,   // 2460..24FF; Enclosed Alphanumerics

  2549             0x2500,   // 2500..257F; Box Drawing

  2550             0x2580,   // 2580..259F; Block Elements

  2551             0x25A0,   // 25A0..25FF; Geometric Shapes

  2552             0x2600,   // 2600..26FF; Miscellaneous Symbols

  2553             0x2700,   // 2700..27BF; Dingbats

  2554             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A

  2555             0x27F0,   // 27F0..27FF; Supplemental Arrows-A

  2556             0x2800,   // 2800..28FF; Braille Patterns

  2557             0x2900,   // 2900..297F; Supplemental Arrows-B

  2558             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B

  2559             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators

  2560             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows

  2561             0x2C00,   // 2C00..2C5F; Glagolitic

  2562             0x2C60,   // 2C60..2C7F; Latin Extended-C

  2563             0x2C80,   // 2C80..2CFF; Coptic

  2564             0x2D00,   // 2D00..2D2F; Georgian Supplement

  2565             0x2D30,   // 2D30..2D7F; Tifinagh

  2566             0x2D80,   // 2D80..2DDF; Ethiopic Extended

  2567             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A

  2568             0x2E00,   // 2E00..2E7F; Supplemental Punctuation

  2569             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement

  2570             0x2F00,   // 2F00..2FDF; Kangxi Radicals

  2571             0x2FE0,   //             unassigned

  2572             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters

  2573             0x3000,   // 3000..303F; CJK Symbols and Punctuation

  2574             0x3040,   // 3040..309F; Hiragana

  2575             0x30A0,   // 30A0..30FF; Katakana

  2576             0x3100,   // 3100..312F; Bopomofo

  2577             0x3130,   // 3130..318F; Hangul Compatibility Jamo

  2578             0x3190,   // 3190..319F; Kanbun

  2579             0x31A0,   // 31A0..31BF; Bopomofo Extended

  2580             0x31C0,   // 31C0..31EF; CJK Strokes

  2581             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions

  2582             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months

  2583             0x3300,   // 3300..33FF; CJK Compatibility

  2584             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A

  2585             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols

  2586             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs

  2587             0xA000,   // A000..A48F; Yi Syllables

  2588             0xA490,   // A490..A4CF; Yi Radicals

  2589             0xA4D0,   // A4D0..A4FF; Lisu

  2590             0xA500,   // A500..A63F; Vai

  2591             0xA640,   // A640..A69F; Cyrillic Extended-B

  2592             0xA6A0,   // A6A0..A6FF; Bamum

  2593             0xA700,   // A700..A71F; Modifier Tone Letters

  2594             0xA720,   // A720..A7FF; Latin Extended-D

  2595             0xA800,   // A800..A82F; Syloti Nagri

  2596             0xA830,   // A830..A83F; Common Indic Number Forms

  2597             0xA840,   // A840..A87F; Phags-pa

  2598             0xA880,   // A880..A8DF; Saurashtra

  2599             0xA8E0,   // A8E0..A8FF; Devanagari Extended

  2600             0xA900,   // A900..A92F; Kayah Li

  2601             0xA930,   // A930..A95F; Rejang

  2602             0xA960,   // A960..A97F; Hangul Jamo Extended-A

  2603             0xA980,   // A980..A9DF; Javanese

  2604             0xA9E0,   //             unassigned

  2605             0xAA00,   // AA00..AA5F; Cham

  2606             0xAA60,   // AA60..AA7F; Myanmar Extended-A

  2607             0xAA80,   // AA80..AADF; Tai Viet

  2608             0xAAE0,   //             unassigned

  2609             0xAB00,   // AB00..AB2F; Ethiopic Extended-A

  2610             0xAB30,   //             unassigned

  2611             0xABC0,   // ABC0..ABFF; Meetei Mayek

  2612             0xAC00,   // AC00..D7AF; Hangul Syllables

  2613             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B

  2614             0xD800,   // D800..DB7F; High Surrogates

  2615             0xDB80,   // DB80..DBFF; High Private Use Surrogates

  2616             0xDC00,   // DC00..DFFF; Low Surrogates

  2617             0xE000,   // E000..F8FF; Private Use Area

  2618             0xF900,   // F900..FAFF; CJK Compatibility Ideographs

  2619             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms

  2620             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A

  2621             0xFE00,   // FE00..FE0F; Variation Selectors

  2622             0xFE10,   // FE10..FE1F; Vertical Forms

  2623             0xFE20,   // FE20..FE2F; Combining Half Marks

  2624             0xFE30,   // FE30..FE4F; CJK Compatibility Forms

  2625             0xFE50,   // FE50..FE6F; Small Form Variants

  2626             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B

  2627             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms

  2628             0xFFF0,   // FFF0..FFFF; Specials

  2629             0x10000,  // 10000..1007F; Linear B Syllabary

  2630             0x10080,  // 10080..100FF; Linear B Ideograms

  2631             0x10100,  // 10100..1013F; Aegean Numbers

  2632             0x10140,  // 10140..1018F; Ancient Greek Numbers

  2633             0x10190,  // 10190..101CF; Ancient Symbols

  2634             0x101D0,  // 101D0..101FF; Phaistos Disc

  2635             0x10200,  //               unassigned

  2636             0x10280,  // 10280..1029F; Lycian

  2637             0x102A0,  // 102A0..102DF; Carian

  2638             0x102E0,  //               unassigned

  2639             0x10300,  // 10300..1032F; Old Italic

  2640             0x10330,  // 10330..1034F; Gothic

  2641             0x10350,  //               unassigned

  2642             0x10380,  // 10380..1039F; Ugaritic

  2643             0x103A0,  // 103A0..103DF; Old Persian

  2644             0x103E0,  //               unassigned

  2645             0x10400,  // 10400..1044F; Deseret

  2646             0x10450,  // 10450..1047F; Shavian

  2647             0x10480,  // 10480..104AF; Osmanya

  2648             0x104B0,  //               unassigned

  2649             0x10800,  // 10800..1083F; Cypriot Syllabary

  2650             0x10840,  // 10840..1085F; Imperial Aramaic

  2651             0x10860,  //               unassigned

  2652             0x10900,  // 10900..1091F; Phoenician

  2653             0x10920,  // 10920..1093F; Lydian

  2654             0x10940,  //               unassigned

  2655             0x10A00,  // 10A00..10A5F; Kharoshthi

  2656             0x10A60,  // 10A60..10A7F; Old South Arabian

  2657             0x10A80,  //               unassigned

  2658             0x10B00,  // 10B00..10B3F; Avestan

  2659             0x10B40,  // 10B40..10B5F; Inscriptional Parthian

  2660             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi

  2661             0x10B80,  //               unassigned

  2662             0x10C00,  // 10C00..10C4F; Old Turkic

  2663             0x10C50,  //               unassigned

  2664             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols

  2665             0x10E80,  //               unassigned

  2666             0x11000,  // 11000..1107F; Brahmi

  2667             0x11080,  // 11080..110CF; Kaithi

  2668             0x110D0,  //               unassigned

  2669             0x12000,  // 12000..123FF; Cuneiform

  2670             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation

  2671             0x12480,  //               unassigned

  2672             0x13000,  // 13000..1342F; Egyptian Hieroglyphs

  2673             0x13430,  //               unassigned

  2674             0x16800,  // 16800..16A3F; Bamum Supplement

  2675             0x16A40,  //               unassigned

  2676             0x1B000,  // 1B000..1B0FF; Kana Supplement

  2677             0x1B100,  //               unassigned

  2678             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols

  2679             0x1D100,  // 1D100..1D1FF; Musical Symbols

  2680             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation

  2681             0x1D250,  //               unassigned

  2682             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols

  2683             0x1D360,  // 1D360..1D37F; Counting Rod Numerals

  2684             0x1D380,  //               unassigned

  2685             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols

  2686             0x1D800,  //               unassigned

  2687             0x1F000,  // 1F000..1F02F; Mahjong Tiles

  2688             0x1F030,  // 1F030..1F09F; Domino Tiles

  2689             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards

  2690             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement

  2691             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement

  2692             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs

  2693             0x1F600,  // 1F600..1F64F; Emoticons

  2694             0x1F650,  //               unassigned

  2695             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols

  2696             0x1F700,  // 1F700..1F77F; Alchemical Symbols

  2697             0x1F780,  //               unassigned

  2698             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B

  2699             0x2A6E0,  //               unassigned

  2700             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C

  2701             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D

  2702             0x2B820,  //               unassigned

  2703             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement

  2704             0x2FA20,  //               unassigned

  2705             0xE0000,  // E0000..E007F; Tags

  2706             0xE0080,  //               unassigned

  2707             0xE0100,  // E0100..E01EF; Variation Selectors Supplement

  2708             0xE01F0,  //               unassigned

  2709             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A

  2710             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B

  2711         };

  2713         private static final UnicodeBlock[] blocks = {

  2714             BASIC_LATIN,

  2715             LATIN_1_SUPPLEMENT,

  2716             LATIN_EXTENDED_A,

  2717             LATIN_EXTENDED_B,

  2718             IPA_EXTENSIONS,

  2719             SPACING_MODIFIER_LETTERS,

  2720             COMBINING_DIACRITICAL_MARKS,

  2721             GREEK,

  2722             CYRILLIC,

  2723             CYRILLIC_SUPPLEMENTARY,

  2724             ARMENIAN,

  2725             HEBREW,

  2726             ARABIC,

  2727             SYRIAC,

  2728             ARABIC_SUPPLEMENT,

  2729             THAANA,

  2730             NKO,

  2731             SAMARITAN,

  2732             MANDAIC,

  2733             null,

  2734             DEVANAGARI,

  2735             BENGALI,

  2736             GURMUKHI,

  2737             GUJARATI,

  2738             ORIYA,

  2739             TAMIL,

  2740             TELUGU,

  2741             KANNADA,

  2742             MALAYALAM,

  2743             SINHALA,

  2744             THAI,

  2745             LAO,

  2746             TIBETAN,

  2747             MYANMAR,

  2748             GEORGIAN,

  2749             HANGUL_JAMO,

  2750             ETHIOPIC,

  2751             ETHIOPIC_SUPPLEMENT,

  2752             CHEROKEE,

  2753             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,

  2754             OGHAM,

  2755             RUNIC,

  2756             TAGALOG,

  2757             HANUNOO,

  2758             BUHID,

  2759             TAGBANWA,

  2760             KHMER,

  2761             MONGOLIAN,

  2762             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,

  2763             LIMBU,

  2764             TAI_LE,

  2765             NEW_TAI_LUE,

  2766             KHMER_SYMBOLS,

  2767             BUGINESE,

  2768             TAI_THAM,

  2769             null,

  2770             BALINESE,

  2771             SUNDANESE,

  2772             BATAK,

  2773             LEPCHA,

  2774             OL_CHIKI,

  2775             null,

  2776             VEDIC_EXTENSIONS,

  2777             PHONETIC_EXTENSIONS,

  2778             PHONETIC_EXTENSIONS_SUPPLEMENT,

  2779             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,

  2780             LATIN_EXTENDED_ADDITIONAL,

  2781             GREEK_EXTENDED,

  2782             GENERAL_PUNCTUATION,

  2783             SUPERSCRIPTS_AND_SUBSCRIPTS,

  2784             CURRENCY_SYMBOLS,

  2785             COMBINING_MARKS_FOR_SYMBOLS,

  2786             LETTERLIKE_SYMBOLS,

  2787             NUMBER_FORMS,

  2788             ARROWS,

  2789             MATHEMATICAL_OPERATORS,

  2790             MISCELLANEOUS_TECHNICAL,

  2791             CONTROL_PICTURES,

  2792             OPTICAL_CHARACTER_RECOGNITION,

  2793             ENCLOSED_ALPHANUMERICS,

  2794             BOX_DRAWING,

  2795             BLOCK_ELEMENTS,

  2796             GEOMETRIC_SHAPES,

  2797             MISCELLANEOUS_SYMBOLS,

  2798             DINGBATS,

  2799             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,

  2800             SUPPLEMENTAL_ARROWS_A,

  2801             BRAILLE_PATTERNS,

  2802             SUPPLEMENTAL_ARROWS_B,

  2803             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,

  2804             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,

  2805             MISCELLANEOUS_SYMBOLS_AND_ARROWS,

  2806             GLAGOLITIC,

  2807             LATIN_EXTENDED_C,

  2808             COPTIC,

  2809             GEORGIAN_SUPPLEMENT,

  2810             TIFINAGH,

  2811             ETHIOPIC_EXTENDED,

  2812             CYRILLIC_EXTENDED_A,

  2813             SUPPLEMENTAL_PUNCTUATION,

  2814             CJK_RADICALS_SUPPLEMENT,

  2815             KANGXI_RADICALS,

  2816             null,

  2817             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,

  2818             CJK_SYMBOLS_AND_PUNCTUATION,

  2819             HIRAGANA,

  2820             KATAKANA,

  2821             BOPOMOFO,

  2822             HANGUL_COMPATIBILITY_JAMO,

  2823             KANBUN,

  2824             BOPOMOFO_EXTENDED,

  2825             CJK_STROKES,

  2826             KATAKANA_PHONETIC_EXTENSIONS,

  2827             ENCLOSED_CJK_LETTERS_AND_MONTHS,

  2828             CJK_COMPATIBILITY,

  2829             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,

  2830             YIJING_HEXAGRAM_SYMBOLS,

  2831             CJK_UNIFIED_IDEOGRAPHS,

  2832             YI_SYLLABLES,

  2833             YI_RADICALS,

  2834             LISU,

  2835             VAI,

  2836             CYRILLIC_EXTENDED_B,

  2837             BAMUM,

  2838             MODIFIER_TONE_LETTERS,

  2839             LATIN_EXTENDED_D,

  2840             SYLOTI_NAGRI,

  2841             COMMON_INDIC_NUMBER_FORMS,

  2842             PHAGS_PA,

  2843             SAURASHTRA,

  2844             DEVANAGARI_EXTENDED,

  2845             KAYAH_LI,

  2846             REJANG,

  2847             HANGUL_JAMO_EXTENDED_A,

  2848             JAVANESE,

  2849             null,

  2850             CHAM,

  2851             MYANMAR_EXTENDED_A,

  2852             TAI_VIET,

  2853             null,

  2854             ETHIOPIC_EXTENDED_A,

  2855             null,

  2856             MEETEI_MAYEK,

  2857             HANGUL_SYLLABLES,

  2858             HANGUL_JAMO_EXTENDED_B,

  2859             HIGH_SURROGATES,

  2860             HIGH_PRIVATE_USE_SURROGATES,

  2861             LOW_SURROGATES,

  2862             PRIVATE_USE_AREA,

  2863             CJK_COMPATIBILITY_IDEOGRAPHS,

  2864             ALPHABETIC_PRESENTATION_FORMS,

  2865             ARABIC_PRESENTATION_FORMS_A,

  2866             VARIATION_SELECTORS,

  2867             VERTICAL_FORMS,

  2868             COMBINING_HALF_MARKS,

  2869             CJK_COMPATIBILITY_FORMS,

  2870             SMALL_FORM_VARIANTS,

  2871             ARABIC_PRESENTATION_FORMS_B,

  2872             HALFWIDTH_AND_FULLWIDTH_FORMS,

  2873             SPECIALS,

  2874             LINEAR_B_SYLLABARY,

  2875             LINEAR_B_IDEOGRAMS,

  2876             AEGEAN_NUMBERS,

  2877             ANCIENT_GREEK_NUMBERS,

  2878             ANCIENT_SYMBOLS,

  2879             PHAISTOS_DISC,

  2880             null,

  2881             LYCIAN,

  2882             CARIAN,

  2883             null,

  2884             OLD_ITALIC,

  2885             GOTHIC,

  2886             null,

  2887             UGARITIC,

  2888             OLD_PERSIAN,

  2889             null,

  2890             DESERET,

  2891             SHAVIAN,

  2892             OSMANYA,

  2893             null,

  2894             CYPRIOT_SYLLABARY,

  2895             IMPERIAL_ARAMAIC,

  2896             null,

  2897             PHOENICIAN,

  2898             LYDIAN,

  2899             null,

  2900             KHAROSHTHI,

  2901             OLD_SOUTH_ARABIAN,

  2902             null,

  2903             AVESTAN,

  2904             INSCRIPTIONAL_PARTHIAN,

  2905             INSCRIPTIONAL_PAHLAVI,

  2906             null,

  2907             OLD_TURKIC,

  2908             null,

  2909             RUMI_NUMERAL_SYMBOLS,

  2910             null,

  2911             BRAHMI,

  2912             KAITHI,

  2913             null,

  2914             CUNEIFORM,

  2915             CUNEIFORM_NUMBERS_AND_PUNCTUATION,

  2916             null,

  2917             EGYPTIAN_HIEROGLYPHS,

  2918             null,

  2919             BAMUM_SUPPLEMENT,

  2920             null,

  2921             KANA_SUPPLEMENT,

  2922             null,

  2923             BYZANTINE_MUSICAL_SYMBOLS,

  2924             MUSICAL_SYMBOLS,

  2925             ANCIENT_GREEK_MUSICAL_NOTATION,

  2926             null,

  2927             TAI_XUAN_JING_SYMBOLS,

  2928             COUNTING_ROD_NUMERALS,

  2929             null,

  2930             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,

  2931             null,

  2932             MAHJONG_TILES,

  2933             DOMINO_TILES,

  2934             PLAYING_CARDS,

  2935             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,

  2936             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,

  2937             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,

  2938             EMOTICONS,

  2939             null,

  2940             TRANSPORT_AND_MAP_SYMBOLS,

  2941             ALCHEMICAL_SYMBOLS,

  2942             null,

  2943             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,

  2944             null,

  2945             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,

  2946             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,

  2947             null,

  2948             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,

  2949             null,

  2950             TAGS,

  2951             null,

  2952             VARIATION_SELECTORS_SUPPLEMENT,

  2953             null,

  2954             SUPPLEMENTARY_PRIVATE_USE_AREA_A,

  2955             SUPPLEMENTARY_PRIVATE_USE_AREA_B

  2956         };

  2959         /**

  2960          * Returns the object representing the Unicode block containing the

  2961          * given character, or {@code null} if the character is not a

  2962          * member of a defined block.

  2963          *

  2964          * <p><b>Note:</b> This method cannot handle

  2965          * <a href="Character.html#supplementary"> supplementary

  2966          * characters</a>.  To support all Unicode characters, including

  2967          * supplementary characters, use the {@link #of(int)} method.

  2968          *

  2969          * @param   c  The character in question

  2970          * @return  The {@code UnicodeBlock} instance representing the

  2971          *          Unicode block of which this character is a member, or

  2972          *          {@code null} if the character is not a member of any

  2973          *          Unicode block

  2974          */

  2975         public static UnicodeBlock of(char c) {

  2976             return of((int)c);

  2977         }

  2979         /**

  2980          * Returns the object representing the Unicode block

  2981          * containing the given character (Unicode code point), or

  2982          * {@code null} if the character is not a member of a

  2983          * defined block.

  2984          *

  2985          * @param   codePoint the character (Unicode code point) in question.

  2986          * @return  The {@code UnicodeBlock} instance representing the

  2987          *          Unicode block of which this character is a member, or

  2988          *          {@code null} if the character is not a member of any

  2989          *          Unicode block

  2990          * @exception IllegalArgumentException if the specified

  2991          * {@code codePoint} is an invalid Unicode code point.

  2992          * @see Character#isValidCodePoint(int)

  2993          * @since   1.5

  2994          */

  2995         public static UnicodeBlock of(int codePoint) {

  2996             if (!isValidCodePoint(codePoint)) {

  2997                 throw new IllegalArgumentException();

  2998             }

  3000             int top, bottom, current;

  3001             bottom = 0;

  3002             top = blockStarts.length;

  3003             current = top/2;

  3005             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]

  3006             while (top - bottom > 1) {

  3007                 if (codePoint >= blockStarts[current]) {

  3008                     bottom = current;

  3009                 } else {

  3010                     top = current;

  3011                 }

  3012                 current = (top + bottom) / 2;

  3013             }

  3014             return blocks[current];

  3015         }

  3017         /**

  3018          * Returns the UnicodeBlock with the given name. Block

  3019          * names are determined by The Unicode Standard. The file

  3020          * Blocks-&lt;version&gt;.txt defines blocks for a particular

  3021          * version of the standard. The {@link Character} class specifies

  3022          * the version of the standard that it supports.

  3023          * <p>

  3024          * This method accepts block names in the following forms:

  3025          * <ol>

  3026          * <li> Canonical block names as defined by the Unicode Standard.

  3027          * For example, the standard defines a "Basic Latin" block. Therefore, this

  3028          * method accepts "Basic Latin" as a valid block name. The documentation of

  3029          * each UnicodeBlock provides the canonical name.

  3030          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"

  3031          * is a valid block name for the "Basic Latin" block.

  3032          * <li>The text representation of each constant UnicodeBlock identifier.

  3033          * For example, this method will return the {@link #BASIC_LATIN} block if

  3034          * provided with the "BASIC_LATIN" name. This form replaces all spaces and

  3035          * hyphens in the canonical name with underscores.

  3036          * </ol>

  3037          * Finally, character case is ignored for all of the valid block name forms.

  3038          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.

  3039          * The en_US locale's case mapping rules are used to provide case-insensitive

  3040          * string comparisons for block name validation.

  3041          * <p>

  3042          * If the Unicode Standard changes block names, both the previous and

  3043          * current names will be accepted.

  3044          *

  3045          * @param blockName A {@code UnicodeBlock} name.

  3046          * @return The {@code UnicodeBlock} instance identified

  3047          *         by {@code blockName}

  3048          * @throws IllegalArgumentException if {@code blockName} is an

  3049          *         invalid name

  3050          * @throws NullPointerException if {@code blockName} is null

  3051          * @since 1.5

  3052          */

  3053         public static final UnicodeBlock forName(String blockName) {

  3054             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));

  3055             if (block == null) {

  3056                 throw new IllegalArgumentException();

  3057             }

  3058             return block;

  3059         }

  3060     }

  3063     /**

  3064      * A family of character subsets representing the character scripts

  3065      * defined in the <a href="http://www.unicode.org/reports/tr24/">

  3066      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode

  3067      * character is assigned to a single Unicode script, either a specific

  3068      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or

  3069      * one of the following three special values,

  3070      * {@link Character.UnicodeScript#INHERITED Inherited},

  3071      * {@link Character.UnicodeScript#COMMON Common} or

  3072      * {@link Character.UnicodeScript#UNKNOWN Unknown}.

  3073      *

  3074      * @since 1.7

  3075      */

  3076     public static enum UnicodeScript {

  3077         /**

  3078          * Unicode script "Common".

  3079          */

  3080         COMMON,

  3082         /**

  3083          * Unicode script "Latin".

  3084          */

  3085         LATIN,

  3087         /**

  3088          * Unicode script "Greek".

  3089          */

  3090         GREEK,

  3092         /**

  3093          * Unicode script "Cyrillic".

  3094          */

  3095         CYRILLIC,

  3097         /**

  3098          * Unicode script "Armenian".

  3099          */

  3100         ARMENIAN,

  3102         /**

  3103          * Unicode script "Hebrew".

  3104          */

  3105         HEBREW,

  3107         /**

  3108          * Unicode script "Arabic".

  3109          */

  3110         ARABIC,

  3112         /**

  3113          * Unicode script "Syriac".

  3114          */

  3115         SYRIAC,

  3117         /**

  3118          * Unicode script "Thaana".

  3119          */

  3120         THAANA,

  3122         /**

  3123          * Unicode script "Devanagari".

  3124          */

  3125         DEVANAGARI,

  3127         /**

  3128          * Unicode script "Bengali".

  3129          */

  3130         BENGALI,

  3132         /**

  3133          * Unicode script "Gurmukhi".

  3134          */

  3135         GURMUKHI,

  3137         /**

  3138          * Unicode script "Gujarati".

  3139          */

  3140         GUJARATI,

  3142         /**

  3143          * Unicode script "Oriya".

  3144          */

  3145         ORIYA,

  3147         /**

  3148          * Unicode script "Tamil".

  3149          */

  3150         TAMIL,

  3152         /**

  3153          * Unicode script "Telugu".

  3154          */

  3155         TELUGU,

  3157         /**

  3158          * Unicode script "Kannada".

  3159          */

  3160         KANNADA,

  3162         /**

  3163          * Unicode script "Malayalam".

  3164          */

  3165         MALAYALAM,

  3167         /**

  3168          * Unicode script "Sinhala".

  3169          */

  3170         SINHALA,

  3172         /**

  3173          * Unicode script "Thai".

  3174          */

  3175         THAI,

  3177         /**

  3178          * Unicode script "Lao".

  3179          */

  3180         LAO,

  3182         /**

  3183          * Unicode script "Tibetan".

  3184          */

  3185         TIBETAN,

  3187         /**

  3188          * Unicode script "Myanmar".

  3189          */

  3190         MYANMAR,

  3192         /**

  3193          * Unicode script "Georgian".

  3194          */

  3195         GEORGIAN,

  3197         /**

  3198          * Unicode script "Hangul".

  3199          */

  3200         HANGUL,

  3202         /**

  3203          * Unicode script "Ethiopic".

  3204          */

  3205         ETHIOPIC,

  3207         /**

  3208          * Unicode script "Cherokee".

  3209          */

  3210         CHEROKEE,

  3212         /**

  3213          * Unicode script "Canadian_Aboriginal".

  3214          */

  3215         CANADIAN_ABORIGINAL,

  3217         /**

  3218          * Unicode script "Ogham".

  3219          */

  3220         OGHAM,

  3222         /**

  3223          * Unicode script "Runic".

  3224          */

  3225         RUNIC,

  3227         /**

  3228          * Unicode script "Khmer".

  3229          */

  3230         KHMER,

  3232         /**

  3233          * Unicode script "Mongolian".

  3234          */

  3235         MONGOLIAN,

  3237         /**

  3238          * Unicode script "Hiragana".

  3239          */

  3240         HIRAGANA,

  3242         /**

  3243          * Unicode script "Katakana".

  3244          */

  3245         KATAKANA,

  3247         /**

  3248          * Unicode script "Bopomofo".

  3249          */

  3250         BOPOMOFO,

  3252         /**

  3253          * Unicode script "Han".

  3254          */

  3255         HAN,

  3257         /**

  3258          * Unicode script "Yi".

  3259          */

  3260         YI,

  3262         /**

  3263          * Unicode script "Old_Italic".

  3264          */

  3265         OLD_ITALIC,

  3267         /**

  3268          * Unicode script "Gothic".

  3269          */

  3270         GOTHIC,

  3272         /**

  3273          * Unicode script "Deseret".

  3274          */

  3275         DESERET,

  3277         /**

  3278          * Unicode script "Inherited".

  3279          */

  3280         INHERITED,

  3282         /**

  3283          * Unicode script "Tagalog".

  3284          */

  3285         TAGALOG,

  3287         /**

  3288          * Unicode script "Hanunoo".

  3289          */

  3290         HANUNOO,

  3292         /**

  3293          * Unicode script "Buhid".

  3294          */

  3295         BUHID,

  3297         /**

  3298          * Unicode script "Tagbanwa".

  3299          */

  3300         TAGBANWA,

  3302         /**

  3303          * Unicode script "Limbu".

  3304          */

  3305         LIMBU,

  3307         /**

  3308          * Unicode script "Tai_Le".

  3309          */

  3310         TAI_LE,

  3312         /**

  3313          * Unicode script "Linear_B".

  3314          */

  3315         LINEAR_B,

  3317         /**

  3318          * Unicode script "Ugaritic".

  3319          */

  3320         UGARITIC,

  3322         /**

  3323          * Unicode script "Shavian".

  3324          */

  3325         SHAVIAN,

  3327         /**

  3328          * Unicode script "Osmanya".

  3329          */

  3330         OSMANYA,

  3332         /**

  3333          * Unicode script "Cypriot".

  3334          */

  3335         CYPRIOT,

  3337         /**

  3338          * Unicode script "Braille".

  3339          */

  3340         BRAILLE,

  3342         /**

  3343          * Unicode script "Buginese".

  3344          */

  3345         BUGINESE,

  3347         /**

  3348          * Unicode script "Coptic".

  3349          */

  3350         COPTIC,

  3352         /**

  3353          * Unicode script "New_Tai_Lue".

  3354          */

  3355         NEW_TAI_LUE,

  3357         /**

  3358          * Unicode script "Glagolitic".

  3359          */

  3360         GLAGOLITIC,

  3362         /**

  3363          * Unicode script "Tifinagh".

  3364          */

  3365         TIFINAGH,

  3367         /**

  3368          * Unicode script "Syloti_Nagri".

  3369          */

  3370         SYLOTI_NAGRI,

  3372         /**

  3373          * Unicode script "Old_Persian".

  3374          */

  3375         OLD_PERSIAN,

  3377         /**

  3378          * Unicode script "Kharoshthi".

  3379          */

  3380         KHAROSHTHI,

  3382         /**

  3383          * Unicode script "Balinese".

  3384          */

  3385         BALINESE,

  3387         /**

  3388          * Unicode script "Cuneiform".

  3389          */

  3390         CUNEIFORM,

  3392         /**

  3393          * Unicode script "Phoenician".

  3394          */

  3395         PHOENICIAN,

  3397         /**

  3398          * Unicode script "Phags_Pa".

  3399          */

  3400         PHAGS_PA,

  3402         /**

  3403          * Unicode script "Nko".

  3404          */

  3405         NKO,

  3407         /**

  3408          * Unicode script "Sundanese".

  3409          */

  3410         SUNDANESE,

  3412         /**

  3413          * Unicode script "Batak".

  3414          */

  3415         BATAK,

  3417         /**

  3418          * Unicode script "Lepcha".

  3419          */

  3420         LEPCHA,

  3422         /**

  3423          * Unicode script "Ol_Chiki".

  3424          */

  3425         OL_CHIKI,

  3427         /**

  3428          * Unicode script "Vai".

  3429          */

  3430         VAI,

  3432         /**

  3433          * Unicode script "Saurashtra".

  3434          */

  3435         SAURASHTRA,

  3437         /**

  3438          * Unicode script "Kayah_Li".

  3439          */

  3440         KAYAH_LI,

  3442         /**

  3443          * Unicode script "Rejang".

  3444          */

  3445         REJANG,

  3447         /**

  3448          * Unicode script "Lycian".

  3449          */

  3450         LYCIAN,

  3452         /**

  3453          * Unicode script "Carian".

  3454          */

  3455         CARIAN,

  3457         /**

  3458          * Unicode script "Lydian".

  3459          */

  3460         LYDIAN,

  3462         /**

  3463          * Unicode script "Cham".

  3464          */

  3465         CHAM,

  3467         /**

  3468          * Unicode script "Tai_Tham".

  3469          */

  3470         TAI_THAM,

  3472         /**

  3473          * Unicode script "Tai_Viet".

  3474          */

  3475         TAI_VIET,

  3477         /**

  3478          * Unicode script "Avestan".

  3479          */

  3480         AVESTAN,

  3482         /**

  3483          * Unicode script "Egyptian_Hieroglyphs".

  3484          */

  3485         EGYPTIAN_HIEROGLYPHS,

  3487         /**

  3488          * Unicode script "Samaritan".

  3489          */

  3490         SAMARITAN,

  3492         /**

  3493          * Unicode script "Mandaic".

  3494          */

  3495         MANDAIC,

  3497         /**

  3498          * Unicode script "Lisu".

  3499          */

  3500         LISU,

  3502         /**

  3503          * Unicode script "Bamum".

  3504          */

  3505         BAMUM,

  3507         /**

  3508          * Unicode script "Javanese".

  3509          */

  3510         JAVANESE,

  3512         /**

  3513          * Unicode script "Meetei_Mayek".

  3514          */

  3515         MEETEI_MAYEK,

  3517         /**

  3518          * Unicode script "Imperial_Aramaic".

  3519          */

  3520         IMPERIAL_ARAMAIC,

  3522         /**

  3523          * Unicode script "Old_South_Arabian".

  3524          */

  3525         OLD_SOUTH_ARABIAN,

  3527         /**

  3528          * Unicode script "Inscriptional_Parthian".

  3529          */

  3530         INSCRIPTIONAL_PARTHIAN,

  3532         /**

  3533          * Unicode script "Inscriptional_Pahlavi".

  3534          */

  3535         INSCRIPTIONAL_PAHLAVI,

  3537         /**

  3538          * Unicode script "Old_Turkic".

  3539          */

  3540         OLD_TURKIC,

  3542         /**

  3543          * Unicode script "Brahmi".

  3544          */

  3545         BRAHMI,

  3547         /**

  3548          * Unicode script "Kaithi".

  3549          */

  3550         KAITHI,

  3552         /**

  3553          * Unicode script "Unknown".

  3554          */

  3555         UNKNOWN;

  3557         private static final int[] scriptStarts = {

  3558             0x0000,   // 0000..0040; COMMON

  3559             0x0041,   // 0041..005A; LATIN

  3560             0x005B,   // 005B..0060; COMMON

  3561             0x0061,   // 0061..007A; LATIN

  3562             0x007B,   // 007B..00A9; COMMON

  3563             0x00AA,   // 00AA..00AA; LATIN

  3564             0x00AB,   // 00AB..00B9; COMMON

  3565             0x00BA,   // 00BA..00BA; LATIN

  3566             0x00BB,   // 00BB..00BF; COMMON

  3567             0x00C0,   // 00C0..00D6; LATIN

  3568             0x00D7,   // 00D7..00D7; COMMON

  3569             0x00D8,   // 00D8..00F6; LATIN

  3570             0x00F7,   // 00F7..00F7; COMMON

  3571             0x00F8,   // 00F8..02B8; LATIN

  3572             0x02B9,   // 02B9..02DF; COMMON

  3573             0x02E0,   // 02E0..02E4; LATIN

  3574             0x02E5,   // 02E5..02E9; COMMON

  3575             0x02EA,   // 02EA..02EB; BOPOMOFO

  3576             0x02EC,   // 02EC..02FF; COMMON

  3577             0x0300,   // 0300..036F; INHERITED

  3578             0x0370,   // 0370..0373; GREEK

  3579             0x0374,   // 0374..0374; COMMON

  3580             0x0375,   // 0375..037D; GREEK

  3581             0x037E,   // 037E..0383; COMMON

  3582             0x0384,   // 0384..0384; GREEK

  3583             0x0385,   // 0385..0385; COMMON

  3584             0x0386,   // 0386..0386; GREEK

  3585             0x0387,   // 0387..0387; COMMON

  3586             0x0388,   // 0388..03E1; GREEK

  3587             0x03E2,   // 03E2..03EF; COPTIC

  3588             0x03F0,   // 03F0..03FF; GREEK

  3589             0x0400,   // 0400..0484; CYRILLIC

  3590             0x0485,   // 0485..0486; INHERITED

  3591             0x0487,   // 0487..0530; CYRILLIC

  3592             0x0531,   // 0531..0588; ARMENIAN

  3593             0x0589,   // 0589..0589; COMMON

  3594             0x058A,   // 058A..0590; ARMENIAN

  3595             0x0591,   // 0591..05FF; HEBREW

  3596             0x0600,   // 0600..060B; ARABIC

  3597             0x060C,   // 060C..060C; COMMON

  3598             0x060D,   // 060D..061A; ARABIC

  3599             0x061B,   // 061B..061D; COMMON

  3600             0x061E,   // 061E..061E; ARABIC

  3601             0x061F,   // 061F..061F; COMMON

  3602             0x0620,   // 0620..063F; ARABIC

  3603             0x0640,   // 0640..0640; COMMON

  3604             0x0641,   // 0641..064A; ARABIC

  3605             0x064B,   // 064B..0655; INHERITED

  3606             0x0656,   // 0656..065E; ARABIC

  3607             0x065F,   // 065F..065F; INHERITED

  3608             0x0660,   // 0660..0669; COMMON

  3609             0x066A,   // 066A..066F; ARABIC

  3610             0x0670,   // 0670..0670; INHERITED

  3611             0x0671,   // 0671..06DC; ARABIC

  3612             0x06DD,   // 06DD..06DD; COMMON

  3613             0x06DE,   // 06DE..06FF; ARABIC

  3614             0x0700,   // 0700..074F; SYRIAC

  3615             0x0750,   // 0750..077F; ARABIC

  3616             0x0780,   // 0780..07BF; THAANA

  3617             0x07C0,   // 07C0..07FF; NKO

  3618             0x0800,   // 0800..083F; SAMARITAN

  3619             0x0840,   // 0840..08FF; MANDAIC

  3620             0x0900,   // 0900..0950; DEVANAGARI

  3621             0x0951,   // 0951..0952; INHERITED

  3622             0x0953,   // 0953..0963; DEVANAGARI

  3623             0x0964,   // 0964..0965; COMMON

  3624             0x0966,   // 0966..096F; DEVANAGARI

  3625             0x0970,   // 0970..0970; COMMON

  3626             0x0971,   // 0971..0980; DEVANAGARI

  3627             0x0981,   // 0981..0A00; BENGALI

  3628             0x0A01,   // 0A01..0A80; GURMUKHI

  3629             0x0A81,   // 0A81..0B00; GUJARATI

  3630             0x0B01,   // 0B01..0B81; ORIYA

  3631             0x0B82,   // 0B82..0C00; TAMIL

  3632             0x0C01,   // 0C01..0C81; TELUGU

  3633             0x0C82,   // 0C82..0CF0; KANNADA

  3634             0x0D02,   // 0D02..0D81; MALAYALAM

  3635             0x0D82,   // 0D82..0E00; SINHALA

  3636             0x0E01,   // 0E01..0E3E; THAI

  3637             0x0E3F,   // 0E3F..0E3F; COMMON

  3638             0x0E40,   // 0E40..0E80; THAI

  3639             0x0E81,   // 0E81..0EFF; LAO

  3640             0x0F00,   // 0F00..0FD4; TIBETAN

  3641             0x0FD5,   // 0FD5..0FD8; COMMON

  3642             0x0FD9,   // 0FD9..0FFF; TIBETAN

  3643             0x1000,   // 1000..109F; MYANMAR

  3644             0x10A0,   // 10A0..10FA; GEORGIAN

  3645             0x10FB,   // 10FB..10FB; COMMON

  3646             0x10FC,   // 10FC..10FF; GEORGIAN

  3647             0x1100,   // 1100..11FF; HANGUL

  3648             0x1200,   // 1200..139F; ETHIOPIC

  3649             0x13A0,   // 13A0..13FF; CHEROKEE

  3650             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL

  3651             0x1680,   // 1680..169F; OGHAM

  3652             0x16A0,   // 16A0..16EA; RUNIC

  3653             0x16EB,   // 16EB..16ED; COMMON

  3654             0x16EE,   // 16EE..16FF; RUNIC

  3655             0x1700,   // 1700..171F; TAGALOG

  3656             0x1720,   // 1720..1734; HANUNOO

  3657             0x1735,   // 1735..173F; COMMON

  3658             0x1740,   // 1740..175F; BUHID

  3659             0x1760,   // 1760..177F; TAGBANWA

  3660             0x1780,   // 1780..17FF; KHMER

  3661             0x1800,   // 1800..1801; MONGOLIAN

  3662             0x1802,   // 1802..1803; COMMON

  3663             0x1804,   // 1804..1804; MONGOLIAN

  3664             0x1805,   // 1805..1805; COMMON

  3665             0x1806,   // 1806..18AF; MONGOLIAN

  3666             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL

  3667             0x1900,   // 1900..194F; LIMBU

  3668             0x1950,   // 1950..197F; TAI_LE

  3669             0x1980,   // 1980..19DF; NEW_TAI_LUE

  3670             0x19E0,   // 19E0..19FF; KHMER

  3671             0x1A00,   // 1A00..1A1F; BUGINESE

  3672             0x1A20,   // 1A20..1AFF; TAI_THAM

  3673             0x1B00,   // 1B00..1B7F; BALINESE

  3674             0x1B80,   // 1B80..1BBF; SUNDANESE

  3675             0x1BC0,   // 1BC0..1BFF; BATAK

  3676             0x1C00,   // 1C00..1C4F; LEPCHA

  3677             0x1C50,   // 1C50..1CCF; OL_CHIKI

  3678             0x1CD0,   // 1CD0..1CD2; INHERITED

  3679             0x1CD3,   // 1CD3..1CD3; COMMON

  3680             0x1CD4,   // 1CD4..1CE0; INHERITED

  3681             0x1CE1,   // 1CE1..1CE1; COMMON

  3682             0x1CE2,   // 1CE2..1CE8; INHERITED

  3683             0x1CE9,   // 1CE9..1CEC; COMMON

  3684             0x1CED,   // 1CED..1CED; INHERITED

  3685             0x1CEE,   // 1CEE..1CFF; COMMON

  3686             0x1D00,   // 1D00..1D25; LATIN

  3687             0x1D26,   // 1D26..1D2A; GREEK

  3688             0x1D2B,   // 1D2B..1D2B; CYRILLIC

  3689             0x1D2C,   // 1D2C..1D5C; LATIN

  3690             0x1D5D,   // 1D5D..1D61; GREEK

  3691             0x1D62,   // 1D62..1D65; LATIN

  3692             0x1D66,   // 1D66..1D6A; GREEK

  3693             0x1D6B,   // 1D6B..1D77; LATIN

  3694             0x1D78,   // 1D78..1D78; CYRILLIC

  3695             0x1D79,   // 1D79..1DBE; LATIN

  3696             0x1DBF,   // 1DBF..1DBF; GREEK

  3697             0x1DC0,   // 1DC0..1DFF; INHERITED

  3698             0x1E00,   // 1E00..1EFF; LATIN

  3699             0x1F00,   // 1F00..1FFF; GREEK

  3700             0x2000,   // 2000..200B; COMMON

  3701             0x200C,   // 200C..200D; INHERITED

  3702             0x200E,   // 200E..2070; COMMON

  3703             0x2071,   // 2071..2073; LATIN

  3704             0x2074,   // 2074..207E; COMMON

  3705             0x207F,   // 207F..207F; LATIN

  3706             0x2080,   // 2080..208F; COMMON

  3707             0x2090,   // 2090..209F; LATIN

  3708             0x20A0,   // 20A0..20CF; COMMON

  3709             0x20D0,   // 20D0..20FF; INHERITED

  3710             0x2100,   // 2100..2125; COMMON

  3711             0x2126,   // 2126..2126; GREEK

  3712             0x2127,   // 2127..2129; COMMON

  3713             0x212A,   // 212A..212B; LATIN

  3714             0x212C,   // 212C..2131; COMMON

  3715             0x2132,   // 2132..2132; LATIN

  3716             0x2133,   // 2133..214D; COMMON

  3717             0x214E,   // 214E..214E; LATIN

  3718             0x214F,   // 214F..215F; COMMON

  3719             0x2160,   // 2160..2188; LATIN

  3720             0x2189,   // 2189..27FF; COMMON

  3721             0x2800,   // 2800..28FF; BRAILLE

  3722             0x2900,   // 2900..2BFF; COMMON

  3723             0x2C00,   // 2C00..2C5F; GLAGOLITIC

  3724             0x2C60,   // 2C60..2C7F; LATIN

  3725             0x2C80,   // 2C80..2CFF; COPTIC

  3726             0x2D00,   // 2D00..2D2F; GEORGIAN

  3727             0x2D30,   // 2D30..2D7F; TIFINAGH

  3728             0x2D80,   // 2D80..2DDF; ETHIOPIC

  3729             0x2DE0,   // 2DE0..2DFF; CYRILLIC

  3730             0x2E00,   // 2E00..2E7F; COMMON

  3731             0x2E80,   // 2E80..2FEF; HAN

  3732             0x2FF0,   // 2FF0..3004; COMMON

  3733             0x3005,   // 3005..3005; HAN

  3734             0x3006,   // 3006..3006; COMMON

  3735             0x3007,   // 3007..3007; HAN

  3736             0x3008,   // 3008..3020; COMMON

  3737             0x3021,   // 3021..3029; HAN

  3738             0x302A,   // 302A..302D; INHERITED

  3739             0x302E,   // 302E..302F; HANGUL

  3740             0x3030,   // 3030..3037; COMMON

  3741             0x3038,   // 3038..303B; HAN

  3742             0x303C,   // 303C..3040; COMMON

  3743             0x3041,   // 3041..3098; HIRAGANA

  3744             0x3099,   // 3099..309A; INHERITED

  3745             0x309B,   // 309B..309C; COMMON

  3746             0x309D,   // 309D..309F; HIRAGANA

  3747             0x30A0,   // 30A0..30A0; COMMON

  3748             0x30A1,   // 30A1..30FA; KATAKANA

  3749             0x30FB,   // 30FB..30FC; COMMON

  3750             0x30FD,   // 30FD..3104; KATAKANA

  3751             0x3105,   // 3105..3130; BOPOMOFO

  3752             0x3131,   // 3131..318F; HANGUL

  3753             0x3190,   // 3190..319F; COMMON

  3754             0x31A0,   // 31A0..31BF; BOPOMOFO

  3755             0x31C0,   // 31C0..31EF; COMMON

  3756             0x31F0,   // 31F0..31FF; KATAKANA

  3757             0x3200,   // 3200..321F; HANGUL

  3758             0x3220,   // 3220..325F; COMMON

  3759             0x3260,   // 3260..327E; HANGUL

  3760             0x327F,   // 327F..32CF; COMMON

  3761             0x32D0,   // 32D0..3357; KATAKANA

  3762             0x3358,   // 3358..33FF; COMMON

  3763             0x3400,   // 3400..4DBF; HAN

  3764             0x4DC0,   // 4DC0..4DFF; COMMON

  3765             0x4E00,   // 4E00..9FFF; HAN

  3766             0xA000,   // A000..A4CF; YI

  3767             0xA4D0,   // A4D0..A4FF; LISU

  3768             0xA500,   // A500..A63F; VAI

  3769             0xA640,   // A640..A69F; CYRILLIC

  3770             0xA6A0,   // A6A0..A6FF; BAMUM

  3771             0xA700,   // A700..A721; COMMON

  3772             0xA722,   // A722..A787; LATIN

  3773             0xA788,   // A788..A78A; COMMON

  3774             0xA78B,   // A78B..A7FF; LATIN

  3775             0xA800,   // A800..A82F; SYLOTI_NAGRI

  3776             0xA830,   // A830..A83F; COMMON

  3777             0xA840,   // A840..A87F; PHAGS_PA

  3778             0xA880,   // A880..A8DF; SAURASHTRA

  3779             0xA8E0,   // A8E0..A8FF; DEVANAGARI

  3780             0xA900,   // A900..A92F; KAYAH_LI

  3781             0xA930,   // A930..A95F; REJANG

  3782             0xA960,   // A960..A97F; HANGUL

  3783             0xA980,   // A980..A9FF; JAVANESE

  3784             0xAA00,   // AA00..AA5F; CHAM

  3785             0xAA60,   // AA60..AA7F; MYANMAR

  3786             0xAA80,   // AA80..AB00; TAI_VIET

  3787             0xAB01,   // AB01..ABBF; ETHIOPIC

  3788             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK

  3789             0xAC00,   // AC00..D7FB; HANGUL

  3790             0xD7FC,   // D7FC..F8FF; UNKNOWN

  3791             0xF900,   // F900..FAFF; HAN

  3792             0xFB00,   // FB00..FB12; LATIN

  3793             0xFB13,   // FB13..FB1C; ARMENIAN

  3794             0xFB1D,   // FB1D..FB4F; HEBREW

  3795             0xFB50,   // FB50..FD3D; ARABIC

  3796             0xFD3E,   // FD3E..FD4F; COMMON

  3797             0xFD50,   // FD50..FDFC; ARABIC

  3798             0xFDFD,   // FDFD..FDFF; COMMON

  3799             0xFE00,   // FE00..FE0F; INHERITED

  3800             0xFE10,   // FE10..FE1F; COMMON

  3801             0xFE20,   // FE20..FE2F; INHERITED

  3802             0xFE30,   // FE30..FE6F; COMMON

  3803             0xFE70,   // FE70..FEFE; ARABIC

  3804             0xFEFF,   // FEFF..FF20; COMMON

  3805             0xFF21,   // FF21..FF3A; LATIN

  3806             0xFF3B,   // FF3B..FF40; COMMON

  3807             0xFF41,   // FF41..FF5A; LATIN

  3808             0xFF5B,   // FF5B..FF65; COMMON

  3809             0xFF66,   // FF66..FF6F; KATAKANA

  3810             0xFF70,   // FF70..FF70; COMMON

  3811             0xFF71,   // FF71..FF9D; KATAKANA

  3812             0xFF9E,   // FF9E..FF9F; COMMON

  3813             0xFFA0,   // FFA0..FFDF; HANGUL

  3814             0xFFE0,   // FFE0..FFFF; COMMON

  3815             0x10000,  // 10000..100FF; LINEAR_B

  3816             0x10100,  // 10100..1013F; COMMON

  3817             0x10140,  // 10140..1018F; GREEK

  3818             0x10190,  // 10190..101FC; COMMON

  3819             0x101FD,  // 101FD..1027F; INHERITED

  3820             0x10280,  // 10280..1029F; LYCIAN

  3821             0x102A0,  // 102A0..102FF; CARIAN

  3822             0x10300,  // 10300..1032F; OLD_ITALIC

  3823             0x10330,  // 10330..1037F; GOTHIC

  3824             0x10380,  // 10380..1039F; UGARITIC

  3825             0x103A0,  // 103A0..103FF; OLD_PERSIAN

  3826             0x10400,  // 10400..1044F; DESERET

  3827             0x10450,  // 10450..1047F; SHAVIAN

  3828             0x10480,  // 10480..107FF; OSMANYA

  3829             0x10800,  // 10800..1083F; CYPRIOT

  3830             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC

  3831             0x10900,  // 10900..1091F; PHOENICIAN

  3832             0x10920,  // 10920..109FF; LYDIAN

  3833             0x10A00,  // 10A00..10A5F; KHAROSHTHI

  3834             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN

  3835             0x10B00,  // 10B00..10B3F; AVESTAN

  3836             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN

  3837             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI

  3838             0x10C00,  // 10C00..10E5F; OLD_TURKIC

  3839             0x10E60,  // 10E60..10FFF; ARABIC

  3840             0x11000,  // 11000..1107F; BRAHMI

  3841             0x11080,  // 11080..11FFF; KAITHI

  3842             0x12000,  // 12000..12FFF; CUNEIFORM

  3843             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS

  3844             0x16800,  // 16800..16A38; BAMUM

  3845             0x1B000,  // 1B000..1B000; KATAKANA

  3846             0x1B001,  // 1B001..1CFFF; HIRAGANA

  3847             0x1D000,  // 1D000..1D166; COMMON

  3848             0x1D167,  // 1D167..1D169; INHERITED

  3849             0x1D16A,  // 1D16A..1D17A; COMMON

  3850             0x1D17B,  // 1D17B..1D182; INHERITED

  3851             0x1D183,  // 1D183..1D184; COMMON

  3852             0x1D185,  // 1D185..1D18B; INHERITED

  3853             0x1D18C,  // 1D18C..1D1A9; COMMON

  3854             0x1D1AA,  // 1D1AA..1D1AD; INHERITED

  3855             0x1D1AE,  // 1D1AE..1D1FF; COMMON

  3856             0x1D200,  // 1D200..1D2FF; GREEK

  3857             0x1D300,  // 1D300..1F1FF; COMMON

  3858             0x1F200,  // 1F200..1F200; HIRAGANA

  3859             0x1F201,  // 1F210..1FFFF; COMMON

  3860             0x20000,  // 20000..E0000; HAN

  3861             0xE0001,  // E0001..E00FF; COMMON

  3862             0xE0100,  // E0100..E01EF; INHERITED

  3863             0xE01F0   // E01F0..10FFFF; UNKNOWN

  3865         };

  3867         private static final UnicodeScript[] scripts = {

  3868             COMMON,

  3869             LATIN,

  3870             COMMON,

  3871             LATIN,

  3872             COMMON,

  3873             LATIN,

  3874             COMMON,

  3875             LATIN,

  3876             COMMON,

  3877             LATIN,

  3878             COMMON,

  3879             LATIN,

  3880             COMMON,

  3881             LATIN,

  3882             COMMON,

  3883             LATIN,

  3884             COMMON,

  3885             BOPOMOFO,

  3886             COMMON,

  3887             INHERITED,

  3888             GREEK,

  3889             COMMON,

  3890             GREEK,

  3891             COMMON,

  3892             GREEK,

  3893             COMMON,

  3894             GREEK,

  3895             COMMON,

  3896             GREEK,

  3897             COPTIC,

  3898             GREEK,

  3899             CYRILLIC,

  3900             INHERITED,

  3901             CYRILLIC,

  3902             ARMENIAN,

  3903             COMMON,

  3904             ARMENIAN,

  3905             HEBREW,

  3906             ARABIC,

  3907             COMMON,

  3908             ARABIC,

  3909             COMMON,

  3910             ARABIC,

  3911             COMMON,

  3912             ARABIC,

  3913             COMMON,

  3914             ARABIC,

  3915             INHERITED,

  3916             ARABIC,

  3917             INHERITED,

  3918             COMMON,

  3919             ARABIC,

  3920             INHERITED,

  3921             ARABIC,

  3922             COMMON,

  3923             ARABIC,

  3924             SYRIAC,

  3925             ARABIC,

  3926             THAANA,

  3927             NKO,

  3928             SAMARITAN,

  3929             MANDAIC,

  3930             DEVANAGARI,

  3931             INHERITED,

  3932             DEVANAGARI,

  3933             COMMON,

  3934             DEVANAGARI,

  3935             COMMON,

  3936             DEVANAGARI,

  3937             BENGALI,

  3938             GURMUKHI,

  3939             GUJARATI,

  3940             ORIYA,

  3941             TAMIL,

  3942             TELUGU,

  3943             KANNADA,

  3944             MALAYALAM,

  3945             SINHALA,

  3946             THAI,

  3947             COMMON,

  3948             THAI,

  3949             LAO,

  3950             TIBETAN,

  3951             COMMON,

  3952             TIBETAN,

  3953             MYANMAR,

  3954             GEORGIAN,

  3955             COMMON,

  3956             GEORGIAN,

  3957             HANGUL,

  3958             ETHIOPIC,

  3959             CHEROKEE,

  3960             CANADIAN_ABORIGINAL,

  3961             OGHAM,

  3962             RUNIC,

  3963             COMMON,

  3964             RUNIC,

  3965             TAGALOG,

  3966             HANUNOO,

  3967             COMMON,

  3968             BUHID,

  3969             TAGBANWA,

  3970             KHMER,

  3971             MONGOLIAN,

  3972             COMMON,

  3973             MONGOLIAN,

  3974             COMMON,

  3975             MONGOLIAN,

  3976             CANADIAN_ABORIGINAL,

  3977             LIMBU,

  3978             TAI_LE,

  3979             NEW_TAI_LUE,

  3980             KHMER,

  3981             BUGINESE,

  3982             TAI_THAM,

  3983             BALINESE,

  3984             SUNDANESE,

  3985             BATAK,

  3986             LEPCHA,

  3987             OL_CHIKI,

  3988             INHERITED,

  3989             COMMON,

  3990             INHERITED,

  3991             COMMON,

  3992             INHERITED,

  3993             COMMON,

  3994             INHERITED,

  3995             COMMON,

  3996             LATIN,

  3997             GREEK,

  3998             CYRILLIC,

  3999             LATIN,

  4000             GREEK,

  4001             LATIN,

  4002             GREEK,

  4003             LATIN,

  4004             CYRILLIC,

  4005             LATIN,

  4006             GREEK,

  4007             INHERITED,

  4008             LATIN,

  4009             GREEK,

  4010             COMMON,

  4011             INHERITED,

  4012             COMMON,

  4013             LATIN,

  4014             COMMON,

  4015             LATIN,

  4016             COMMON,

  4017             LATIN,

  4018             COMMON,

  4019             INHERITED,

  4020             COMMON,

  4021             GREEK,

  4022             COMMON,

  4023             LATIN,

  4024             COMMON,

  4025             LATIN,

  4026             COMMON,

  4027             LATIN,

  4028             COMMON,

  4029             LATIN,

  4030             COMMON,

  4031             BRAILLE,

  4032             COMMON,

  4033             GLAGOLITIC,

  4034             LATIN,

  4035             COPTIC,

  4036             GEORGIAN,

  4037             TIFINAGH,

  4038             ETHIOPIC,

  4039             CYRILLIC,

  4040             COMMON,

  4041             HAN,

  4042             COMMON,

  4043             HAN,

  4044             COMMON,

  4045             HAN,

  4046             COMMON,

  4047             HAN,

  4048             INHERITED,

  4049             HANGUL,

  4050             COMMON,

  4051             HAN,

  4052             COMMON,

  4053             HIRAGANA,

  4054             INHERITED,

  4055             COMMON,

  4056             HIRAGANA,

  4057             COMMON,

  4058             KATAKANA,

  4059             COMMON,

  4060             KATAKANA,

  4061             BOPOMOFO,

  4062             HANGUL,

  4063             COMMON,

  4064             BOPOMOFO,

  4065             COMMON,

  4066             KATAKANA,

  4067             HANGUL,

  4068             COMMON,

  4069             HANGUL,

  4070             COMMON,

  4071             KATAKANA,

  4072             COMMON,

  4073             HAN,

  4074             COMMON,

  4075             HAN,

  4076             YI,

  4077             LISU,

  4078             VAI,

  4079             CYRILLIC,

  4080             BAMUM,

  4081             COMMON,

  4082             LATIN,

  4083             COMMON,

  4084             LATIN,

  4085             SYLOTI_NAGRI,

  4086             COMMON,

  4087             PHAGS_PA,

  4088             SAURASHTRA,

  4089             DEVANAGARI,

  4090             KAYAH_LI,

  4091             REJANG,

  4092             HANGUL,

  4093             JAVANESE,

  4094             CHAM,

  4095             MYANMAR,

  4096             TAI_VIET,

  4097             ETHIOPIC,

  4098             MEETEI_MAYEK,

  4099             HANGUL,

  4100             UNKNOWN,

  4101             HAN,

  4102             LATIN,

  4103             ARMENIAN,

  4104             HEBREW,

  4105             ARABIC,

  4106             COMMON,

  4107             ARABIC,

  4108             COMMON,

  4109             INHERITED,

  4110             COMMON,

  4111             INHERITED,

  4112             COMMON,

  4113             ARABIC,

  4114             COMMON,

  4115             LATIN,

  4116             COMMON,

  4117             LATIN,

  4118             COMMON,

  4119             KATAKANA,

  4120             COMMON,

  4121             KATAKANA,

  4122             COMMON,

  4123             HANGUL,

  4124             COMMON,

  4125             LINEAR_B,

  4126             COMMON,

  4127             GREEK,

  4128             COMMON,

  4129             INHERITED,

  4130             LYCIAN,

  4131             CARIAN,

  4132             OLD_ITALIC,

  4133             GOTHIC,

  4134             UGARITIC,

  4135             OLD_PERSIAN,

  4136             DESERET,

  4137             SHAVIAN,

  4138             OSMANYA,

  4139             CYPRIOT,

  4140             IMPERIAL_ARAMAIC,

  4141             PHOENICIAN,

  4142             LYDIAN,

  4143             KHAROSHTHI,

  4144             OLD_SOUTH_ARABIAN,

  4145             AVESTAN,

  4146             INSCRIPTIONAL_PARTHIAN,

  4147             INSCRIPTIONAL_PAHLAVI,

  4148             OLD_TURKIC,

  4149             ARABIC,

  4150             BRAHMI,

  4151             KAITHI,

  4152             CUNEIFORM,

  4153             EGYPTIAN_HIEROGLYPHS,

  4154             BAMUM,

  4155             KATAKANA,

  4156             HIRAGANA,

  4157             COMMON,

  4158             INHERITED,

  4159             COMMON,

  4160             INHERITED,

  4161             COMMON,

  4162             INHERITED,

  4163             COMMON,

  4164             INHERITED,

  4165             COMMON,

  4166             GREEK,

  4167             COMMON,

  4168             HIRAGANA,

  4169             COMMON,

  4170             HAN,

  4171             COMMON,

  4172             INHERITED,

  4173             UNKNOWN

  4174         };

  4176         private static HashMap<String, Character.UnicodeScript> aliases;

  4177         static {

  4178             aliases = new HashMap<>(128);

  4179             aliases.put("ARAB", ARABIC);

  4180             aliases.put("ARMI", IMPERIAL_ARAMAIC);

  4181             aliases.put("ARMN", ARMENIAN);

  4182             aliases.put("AVST", AVESTAN);

  4183             aliases.put("BALI", BALINESE);

  4184             aliases.put("BAMU", BAMUM);

  4185             aliases.put("BATK", BATAK);

  4186             aliases.put("BENG", BENGALI);

  4187             aliases.put("BOPO", BOPOMOFO);

  4188             aliases.put("BRAI", BRAILLE);

  4189             aliases.put("BRAH", BRAHMI);

  4190             aliases.put("BUGI", BUGINESE);

  4191             aliases.put("BUHD", BUHID);

  4192             aliases.put("CANS", CANADIAN_ABORIGINAL);

  4193             aliases.put("CARI", CARIAN);

  4194             aliases.put("CHAM", CHAM);

  4195             aliases.put("CHER", CHEROKEE);

  4196             aliases.put("COPT", COPTIC);

  4197             aliases.put("CPRT", CYPRIOT);

  4198             aliases.put("CYRL", CYRILLIC);

  4199             aliases.put("DEVA", DEVANAGARI);

  4200             aliases.put("DSRT", DESERET);

  4201             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);

  4202             aliases.put("ETHI", ETHIOPIC);

  4203             aliases.put("GEOR", GEORGIAN);

  4204             aliases.put("GLAG", GLAGOLITIC);

  4205             aliases.put("GOTH", GOTHIC);

  4206             aliases.put("GREK", GREEK);

  4207             aliases.put("GUJR", GUJARATI);

  4208             aliases.put("GURU", GURMUKHI);

  4209             aliases.put("HANG", HANGUL);

  4210             aliases.put("HANI", HAN);

  4211             aliases.put("HANO", HANUNOO);

  4212             aliases.put("HEBR", HEBREW);

  4213             aliases.put("HIRA", HIRAGANA);

  4214             // it appears we don't have the KATAKANA_OR_HIRAGANA

  4215             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);

  4216             aliases.put("ITAL", OLD_ITALIC);

  4217             aliases.put("JAVA", JAVANESE);

  4218             aliases.put("KALI", KAYAH_LI);

  4219             aliases.put("KANA", KATAKANA);

  4220             aliases.put("KHAR", KHAROSHTHI);

  4221             aliases.put("KHMR", KHMER);

  4222             aliases.put("KNDA", KANNADA);

  4223             aliases.put("KTHI", KAITHI);

  4224             aliases.put("LANA", TAI_THAM);

  4225             aliases.put("LAOO", LAO);

  4226             aliases.put("LATN", LATIN);

  4227             aliases.put("LEPC", LEPCHA);

  4228             aliases.put("LIMB", LIMBU);

  4229             aliases.put("LINB", LINEAR_B);

  4230             aliases.put("LISU", LISU);

  4231             aliases.put("LYCI", LYCIAN);

  4232             aliases.put("LYDI", LYDIAN);

  4233             aliases.put("MAND", MANDAIC);

  4234             aliases.put("MLYM", MALAYALAM);

  4235             aliases.put("MONG", MONGOLIAN);

  4236             aliases.put("MTEI", MEETEI_MAYEK);

  4237             aliases.put("MYMR", MYANMAR);

  4238             aliases.put("NKOO", NKO);

  4239             aliases.put("OGAM", OGHAM);

  4240             aliases.put("OLCK", OL_CHIKI);

  4241             aliases.put("ORKH", OLD_TURKIC);

  4242             aliases.put("ORYA", ORIYA);

  4243             aliases.put("OSMA", OSMANYA);

  4244             aliases.put("PHAG", PHAGS_PA);

  4245             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);

  4246             aliases.put("PHNX", PHOENICIAN);

  4247             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);

  4248             aliases.put("RJNG", REJANG);

  4249             aliases.put("RUNR", RUNIC);

  4250             aliases.put("SAMR", SAMARITAN);

  4251             aliases.put("SARB", OLD_SOUTH_ARABIAN);

  4252             aliases.put("SAUR", SAURASHTRA);

  4253             aliases.put("SHAW", SHAVIAN);

  4254             aliases.put("SINH", SINHALA);

  4255             aliases.put("SUND", SUNDANESE);

  4256             aliases.put("SYLO", SYLOTI_NAGRI);

  4257             aliases.put("SYRC", SYRIAC);

  4258             aliases.put("TAGB", TAGBANWA);

  4259             aliases.put("TALE", TAI_LE);

  4260             aliases.put("TALU", NEW_TAI_LUE);

  4261             aliases.put("TAML", TAMIL);

  4262             aliases.put("TAVT", TAI_VIET);

  4263             aliases.put("TELU", TELUGU);

  4264             aliases.put("TFNG", TIFINAGH);

  4265             aliases.put("TGLG", TAGALOG);

  4266             aliases.put("THAA", THAANA);

  4267             aliases.put("THAI", THAI);

  4268             aliases.put("TIBT", TIBETAN);

  4269             aliases.put("UGAR", UGARITIC);

  4270             aliases.put("VAII", VAI);

  4271             aliases.put("XPEO", OLD_PERSIAN);

  4272             aliases.put("XSUX", CUNEIFORM);

  4273             aliases.put("YIII", YI);

  4274             aliases.put("ZINH", INHERITED);

  4275             aliases.put("ZYYY", COMMON);

  4276             aliases.put("ZZZZ", UNKNOWN);

  4277         }

  4279         /**

  4280          * Returns the enum constant representing the Unicode script of which

  4281          * the given character (Unicode code point) is assigned to.

  4282          *

  4283          * @param   codePoint the character (Unicode code point) in question.

  4284          * @return  The {@code UnicodeScript} constant representing the

  4285          *          Unicode script of which this character is assigned to.

  4286          *

  4287          * @exception IllegalArgumentException if the specified

  4288          * {@code codePoint} is an invalid Unicode code point.

  4289          * @see Character#isValidCodePoint(int)

  4290          *

  4291          */

  4292         public static UnicodeScript of(int codePoint) {

  4293             if (!isValidCodePoint(codePoint))

  4294                 throw new IllegalArgumentException();

  4295             int type = getType(codePoint);

  4296             // leave SURROGATE and PRIVATE_USE for table lookup

  4297             if (type == UNASSIGNED)

  4298                 return UNKNOWN;

  4299             int index = Arrays.binarySearch(scriptStarts, codePoint);

  4300             if (index < 0)

  4301                 index = -index - 2;

  4302             return scripts[index];

  4303         }

  4305         /**

  4306          * Returns the UnicodeScript constant with the given Unicode script

  4307          * name or the script name alias. Script names and their aliases are

  4308          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt

  4309          * and PropertyValueAliases&lt;version&gt;.txt define script names

  4310          * and the script name aliases for a particular version of the

  4311          * standard. The {@link Character} class specifies the version of

  4312          * the standard that it supports.

  4313          * <p>

  4314          * Character case is ignored for all of the valid script names.

  4315          * The en_US locale's case mapping rules are used to provide

  4316          * case-insensitive string comparisons for script name validation.

  4317          * <p>

  4318          *

  4319          * @param scriptName A {@code UnicodeScript} name.

  4320          * @return The {@code UnicodeScript} constant identified

  4321          *         by {@code scriptName}

  4322          * @throws IllegalArgumentException if {@code scriptName} is an

  4323          *         invalid name

  4324          * @throws NullPointerException if {@code scriptName} is null

  4325          */

  4326         public static final UnicodeScript forName(String scriptName) {

  4327             scriptName = scriptName.toUpperCase(Locale.ENGLISH);

  4328                                  //.replace(' ', '_'));

  4329             UnicodeScript sc = aliases.get(scriptName);

  4330             if (sc != null)

  4331                 return sc;

  4332             return valueOf(scriptName);

  4333         }

  4334     }

  4336     /**

  4337      * The value of the {@code Character}.

  4338      *

  4339      * @serial

  4340      */

  4341     private final char value;

  4343     /** use serialVersionUID from JDK 1.0.2 for interoperability */

  4344     private static final long serialVersionUID = 3786198910865385080L;

  4346     /**

  4347      * Constructs a newly allocated {@code Character} object that

  4348      * represents the specified {@code char} value.

  4349      *

  4350      * @param  value   the value to be represented by the

  4351      *                  {@code Character} object.

  4352      */

  4353     public Character(char value) {

  4354         this.value = value;

  4355     }

  4357     private static class CharacterCache {

  4358         private CharacterCache(){}

  4360         static final Character cache[] = new Character[127 + 1];

  4362         static {

  4363             for (int i = 0; i < cache.length; i++)

  4364                 cache[i] = new Character((char)i);

  4365         }

  4366     }

  4368     /**

  4369      * Returns a <tt>Character</tt> instance representing the specified

  4370      * <tt>char</tt> value.

  4371      * If a new <tt>Character</tt> instance is not required, this method

  4372      * should generally be used in preference to the constructor

  4373      * {@link #Character(char)}, as this method is likely to yield

  4374      * significantly better space and time performance by caching

  4375      * frequently requested values.

  4376      *

  4377      * This method will always cache values in the range {@code

  4378      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may

  4379      * cache other values outside of this range.

  4380      *

  4381      * @param  c a char value.

  4382      * @return a <tt>Character</tt> instance representing <tt>c</tt>.

  4383      * @since  1.5

  4384      */

  4385     public static Character valueOf(char c) {

  4386         if (c <= 127) { // must cache

  4387             return CharacterCache.cache[(int)c];

  4388         }

  4389         return new Character(c);

  4390     }

  4392     /**

  4393      * Returns the value of this {@code Character} object.

  4394      * @return  the primitive {@code char} value represented by

  4395      *          this object.

  4396      */

  4397     public char charValue() {

  4398         return value;

  4399     }

  4401     /**

  4402      * Returns a hash code for this {@code Character}; equal to the result

  4403      * of invoking {@code charValue()}.

  4404      *

  4405      * @return a hash code value for this {@code Character}

  4406      */

  4407     public int hashCode() {

  4408         return (int)value;

  4409     }

  4411     /**

  4412      * Compares this object against the specified object.

  4413      * The result is {@code true} if and only if the argument is not

  4414      * {@code null} and is a {@code Character} object that

  4415      * represents the same {@code char} value as this object.

  4416      *

  4417      * @param   obj   the object to compare with.

  4418      * @return  {@code true} if the objects are the same;

  4419      *          {@code false} otherwise.

  4420      */

  4421     public boolean equals(Object obj) {

  4422         if (obj instanceof Character) {

  4423             return value == ((Character)obj).charValue();

  4424         }

  4425         return false;

  4426     }

  4428     /**

  4429      * Returns a {@code String} object representing this

  4430      * {@code Character}'s value.  The result is a string of

  4431      * length 1 whose sole component is the primitive

  4432      * {@code char} value represented by this

  4433      * {@code Character} object.

  4434      *

  4435      * @return  a string representation of this object.

  4436      */

  4437     public String toString() {

  4438         char buf[] = {value};

  4439         return String.valueOf(buf);

  4440     }

  4442     /**

  4443      * Returns a {@code String} object representing the

  4444      * specified {@code char}.  The result is a string of length

  4445      * 1 consisting solely of the specified {@code char}.

  4446      *

  4447      * @param c the {@code char} to be converted

  4448      * @return the string representation of the specified {@code char}

  4449      * @since 1.4

  4450      */

  4451     public static String toString(char c) {

  4452         return String.valueOf(c);

  4453     }

  4455     /**

  4456      * Determines whether the specified code point is a valid

  4457      * <a href="http://www.unicode.org/glossary/#code_point">

  4458      * Unicode code point value</a>.

  4459      *

  4460      * @param  codePoint the Unicode code point to be tested

  4461      * @return {@code true} if the specified code point value is between

  4462      *         {@link #MIN_CODE_POINT} and

  4463      *         {@link #MAX_CODE_POINT} inclusive;

  4464      *         {@code false} otherwise.

  4465      * @since  1.5

  4466      */

  4467     public static boolean isValidCodePoint(int codePoint) {

  4468         // Optimized form of:

  4469         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT

  4470         int plane = codePoint >>> 16;

  4471         return plane < ((MAX_CODE_POINT + 1) >>> 16);

  4472     }

  4474     /**

  4475      * Determines whether the specified character (Unicode code point)

  4476      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.

  4477      * Such code points can be represented using a single {@code char}.

  4478      *

  4479      * @param  codePoint the character (Unicode code point) to be tested

  4480      * @return {@code true} if the specified code point is between

  4481      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;

  4482      *         {@code false} otherwise.

  4483      * @since  1.7

  4484      */

  4485     public static boolean isBmpCodePoint(int codePoint) {

  4486         return codePoint >>> 16 == 0;

  4487         // Optimized form of:

  4488         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE

  4489         // We consistently use logical shift (>>>) to facilitate

  4490         // additional runtime optimizations.

  4491     }

  4493     /**

  4494      * Determines whether the specified character (Unicode code point)

  4495      * is in the <a href="#supplementary">supplementary character</a> range.

  4496      *

  4497      * @param  codePoint the character (Unicode code point) to be tested

  4498      * @return {@code true} if the specified code point is between

  4499      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and

  4500      *         {@link #MAX_CODE_POINT} inclusive;

  4501      *         {@code false} otherwise.

  4502      * @since  1.5

  4503      */

  4504     public static boolean isSupplementaryCodePoint(int codePoint) {

  4505         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT

  4506             && codePoint <  MAX_CODE_POINT + 1;

  4507     }

  4509     /**

  4510      * Determines if the given {@code char} value is a

  4511      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

  4512      * Unicode high-surrogate code unit</a>

  4513      * (also known as <i>leading-surrogate code unit</i>).

  4514      *

  4515      * <p>Such values do not represent characters by themselves,

  4516      * but are used in the representation of

  4517      * <a href="#supplementary">supplementary characters</a>

  4518      * in the UTF-16 encoding.

  4519      *

  4520      * @param  ch the {@code char} value to be tested.

  4521      * @return {@code true} if the {@code char} value is between

  4522      *         {@link #MIN_HIGH_SURROGATE} and

  4523      *         {@link #MAX_HIGH_SURROGATE} inclusive;

  4524      *         {@code false} otherwise.

  4525      * @see    Character#isLowSurrogate(char)

  4526      * @see    Character.UnicodeBlock#of(int)

  4527      * @since  1.5

  4528      */

  4529     public static boolean isHighSurrogate(char ch) {

  4530         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE

  4531         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);

  4532     }

  4534     /**

  4535      * Determines if the given {@code char} value is a

  4536      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

  4537      * Unicode low-surrogate code unit</a>

  4538      * (also known as <i>trailing-surrogate code unit</i>).

  4539      *

  4540      * <p>Such values do not represent characters by themselves,

  4541      * but are used in the representation of

  4542      * <a href="#supplementary">supplementary characters</a>

  4543      * in the UTF-16 encoding.

  4544      *

  4545      * @param  ch the {@code char} value to be tested.

  4546      * @return {@code true} if the {@code char} value is between

  4547      *         {@link #MIN_LOW_SURROGATE} and

  4548      *         {@link #MAX_LOW_SURROGATE} inclusive;

  4549      *         {@code false} otherwise.

  4550      * @see    Character#isHighSurrogate(char)

  4551      * @since  1.5

  4552      */

  4553     public static boolean isLowSurrogate(char ch) {

  4554         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);

  4555     }

  4557     /**

  4558      * Determines if the given {@code char} value is a Unicode

  4559      * <i>surrogate code unit</i>.

  4560      *

  4561      * <p>Such values do not represent characters by themselves,

  4562      * but are used in the representation of

  4563      * <a href="#supplementary">supplementary characters</a>

  4564      * in the UTF-16 encoding.

  4565      *

  4566      * <p>A char value is a surrogate code unit if and only if it is either

  4567      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or

  4568      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.

  4569      *

  4570      * @param  ch the {@code char} value to be tested.

  4571      * @return {@code true} if the {@code char} value is between

  4572      *         {@link #MIN_SURROGATE} and

  4573      *         {@link #MAX_SURROGATE} inclusive;

  4574      *         {@code false} otherwise.

  4575      * @since  1.7

  4576      */

  4577     public static boolean isSurrogate(char ch) {

  4578         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);

  4579     }

  4581     /**

  4582      * Determines whether the specified pair of {@code char}

  4583      * values is a valid

  4584      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  4585      * Unicode surrogate pair</a>.

  4587      * <p>This method is equivalent to the expression:

  4588      * <blockquote><pre>

  4589      * isHighSurrogate(high) && isLowSurrogate(low)

  4590      * </pre></blockquote>

  4591      *

  4592      * @param  high the high-surrogate code value to be tested

  4593      * @param  low the low-surrogate code value to be tested

  4594      * @return {@code true} if the specified high and

  4595      * low-surrogate code values represent a valid surrogate pair;

  4596      * {@code false} otherwise.

  4597      * @since  1.5

  4598      */

  4599     public static boolean isSurrogatePair(char high, char low) {

  4600         return isHighSurrogate(high) && isLowSurrogate(low);

  4601     }

  4603     /**

  4604      * Determines the number of {@code char} values needed to

  4605      * represent the specified character (Unicode code point). If the

  4606      * specified character is equal to or greater than 0x10000, then

  4607      * the method returns 2. Otherwise, the method returns 1.

  4608      *

  4609      * <p>This method doesn't validate the specified character to be a

  4610      * valid Unicode code point. The caller must validate the

  4611      * character value using {@link #isValidCodePoint(int) isValidCodePoint}

  4612      * if necessary.

  4613      *

  4614      * @param   codePoint the character (Unicode code point) to be tested.

  4615      * @return  2 if the character is a valid supplementary character; 1 otherwise.

  4616      * @see     Character#isSupplementaryCodePoint(int)

  4617      * @since   1.5

  4618      */

  4619     public static int charCount(int codePoint) {

  4620         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;

  4621     }

  4623     /**

  4624      * Converts the specified surrogate pair to its supplementary code

  4625      * point value. This method does not validate the specified

  4626      * surrogate pair. The caller must validate it using {@link

  4627      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.

  4628      *

  4629      * @param  high the high-surrogate code unit

  4630      * @param  low the low-surrogate code unit

  4631      * @return the supplementary code point composed from the

  4632      *         specified surrogate pair.

  4633      * @since  1.5

  4634      */

  4635     public static int toCodePoint(char high, char low) {

  4636         // Optimized form of:

  4637         // return ((high - MIN_HIGH_SURROGATE) << 10)

  4638         //         + (low - MIN_LOW_SURROGATE)

  4639         //         + MIN_SUPPLEMENTARY_CODE_POINT;

  4640         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT

  4641                                        - (MIN_HIGH_SURROGATE << 10)

  4642                                        - MIN_LOW_SURROGATE);

  4643     }

  4645     /**

  4646      * Returns the code point at the given index of the

  4647      * {@code CharSequence}. If the {@code char} value at

  4648      * the given index in the {@code CharSequence} is in the

  4649      * high-surrogate range, the following index is less than the

  4650      * length of the {@code CharSequence}, and the

  4651      * {@code char} value at the following index is in the

  4652      * low-surrogate range, then the supplementary code point

  4653      * corresponding to this surrogate pair is returned. Otherwise,

  4654      * the {@code char} value at the given index is returned.

  4655      *

  4656      * @param seq a sequence of {@code char} values (Unicode code

  4657      * units)

  4658      * @param index the index to the {@code char} values (Unicode

  4659      * code units) in {@code seq} to be converted

  4660      * @return the Unicode code point at the given index

  4661      * @exception NullPointerException if {@code seq} is null.

  4662      * @exception IndexOutOfBoundsException if the value

  4663      * {@code index} is negative or not less than

  4664      * {@link CharSequence#length() seq.length()}.

  4665      * @since  1.5

  4666      */

  4667     public static int codePointAt(CharSequence seq, int index) {

  4668         char c1 = seq.charAt(index++);

  4669         if (isHighSurrogate(c1)) {

  4670             if (index < seq.length()) {

  4671                 char c2 = seq.charAt(index);

  4672                 if (isLowSurrogate(c2)) {

  4673                     return toCodePoint(c1, c2);

  4674                 }

  4675             }

  4676         }

  4677         return c1;

  4678     }

  4680     /**

  4681      * Returns the code point at the given index of the

  4682      * {@code char} array. If the {@code char} value at

  4683      * the given index in the {@code char} array is in the

  4684      * high-surrogate range, the following index is less than the

  4685      * length of the {@code char} array, and the

  4686      * {@code char} value at the following index is in the

  4687      * low-surrogate range, then the supplementary code point

  4688      * corresponding to this surrogate pair is returned. Otherwise,

  4689      * the {@code char} value at the given index is returned.

  4690      *

  4691      * @param a the {@code char} array

  4692      * @param index the index to the {@code char} values (Unicode

  4693      * code units) in the {@code char} array to be converted

  4694      * @return the Unicode code point at the given index

  4695      * @exception NullPointerException if {@code a} is null.

  4696      * @exception IndexOutOfBoundsException if the value

  4697      * {@code index} is negative or not less than

  4698      * the length of the {@code char} array.

  4699      * @since  1.5

  4700      */

  4701     public static int codePointAt(char[] a, int index) {

  4702         return codePointAtImpl(a, index, a.length);

  4703     }

  4705     /**

  4706      * Returns the code point at the given index of the

  4707      * {@code char} array, where only array elements with

  4708      * {@code index} less than {@code limit} can be used. If

  4709      * the {@code char} value at the given index in the

  4710      * {@code char} array is in the high-surrogate range, the

  4711      * following index is less than the {@code limit}, and the

  4712      * {@code char} value at the following index is in the

  4713      * low-surrogate range, then the supplementary code point

  4714      * corresponding to this surrogate pair is returned. Otherwise,

  4715      * the {@code char} value at the given index is returned.

  4716      *

  4717      * @param a the {@code char} array

  4718      * @param index the index to the {@code char} values (Unicode

  4719      * code units) in the {@code char} array to be converted

  4720      * @param limit the index after the last array element that

  4721      * can be used in the {@code char} array

  4722      * @return the Unicode code point at the given index

  4723      * @exception NullPointerException if {@code a} is null.

  4724      * @exception IndexOutOfBoundsException if the {@code index}

  4725      * argument is negative or not less than the {@code limit}

  4726      * argument, or if the {@code limit} argument is negative or

  4727      * greater than the length of the {@code char} array.

  4728      * @since  1.5

  4729      */

  4730     public static int codePointAt(char[] a, int index, int limit) {

  4731         if (index >= limit || limit < 0 || limit > a.length) {

  4732             throw new IndexOutOfBoundsException();

  4733         }

  4734         return codePointAtImpl(a, index, limit);

  4735     }

  4737     // throws ArrayIndexOutofBoundsException if index out of bounds

  4738     static int codePointAtImpl(char[] a, int index, int limit) {

  4739         char c1 = a[index++];

  4740         if (isHighSurrogate(c1)) {

  4741             if (index < limit) {

  4742                 char c2 = a[index];

  4743                 if (isLowSurrogate(c2)) {

  4744                     return toCodePoint(c1, c2);

  4745                 }

  4746             }

  4747         }

  4748         return c1;

  4749     }

  4751     /**

  4752      * Returns the code point preceding the given index of the

  4753      * {@code CharSequence}. If the {@code char} value at

  4754      * {@code (index - 1)} in the {@code CharSequence} is in

  4755      * the low-surrogate range, {@code (index - 2)} is not

  4756      * negative, and the {@code char} value at {@code (index - 2)}

  4757      * in the {@code CharSequence} is in the

  4758      * high-surrogate range, then the supplementary code point

  4759      * corresponding to this surrogate pair is returned. Otherwise,

  4760      * the {@code char} value at {@code (index - 1)} is

  4761      * returned.

  4762      *

  4763      * @param seq the {@code CharSequence} instance

  4764      * @param index the index following the code point that should be returned

  4765      * @return the Unicode code point value before the given index.

  4766      * @exception NullPointerException if {@code seq} is null.

  4767      * @exception IndexOutOfBoundsException if the {@code index}

  4768      * argument is less than 1 or greater than {@link

  4769      * CharSequence#length() seq.length()}.

  4770      * @since  1.5

  4771      */

  4772     public static int codePointBefore(CharSequence seq, int index) {

  4773         char c2 = seq.charAt(--index);

  4774         if (isLowSurrogate(c2)) {

  4775             if (index > 0) {

  4776                 char c1 = seq.charAt(--index);

  4777                 if (isHighSurrogate(c1)) {

  4778                     return toCodePoint(c1, c2);

  4779                 }

  4780             }

  4781         }

  4782         return c2;

  4783     }

  4785     /**

  4786      * Returns the code point preceding the given index of the

  4787      * {@code char} array. If the {@code char} value at

  4788      * {@code (index - 1)} in the {@code char} array is in

  4789      * the low-surrogate range, {@code (index - 2)} is not

  4790      * negative, and the {@code char} value at {@code (index - 2)}

  4791      * in the {@code char} array is in the

  4792      * high-surrogate range, then the supplementary code point

  4793      * corresponding to this surrogate pair is returned. Otherwise,

  4794      * the {@code char} value at {@code (index - 1)} is

  4795      * returned.

  4796      *

  4797      * @param a the {@code char} array

  4798      * @param index the index following the code point that should be returned

  4799      * @return the Unicode code point value before the given index.

  4800      * @exception NullPointerException if {@code a} is null.

  4801      * @exception IndexOutOfBoundsException if the {@code index}

  4802      * argument is less than 1 or greater than the length of the

  4803      * {@code char} array

  4804      * @since  1.5

  4805      */

  4806     public static int codePointBefore(char[] a, int index) {

  4807         return codePointBeforeImpl(a, index, 0);

  4808     }

  4810     /**

  4811      * Returns the code point preceding the given index of the

  4812      * {@code char} array, where only array elements with

  4813      * {@code index} greater than or equal to {@code start}

  4814      * can be used. If the {@code char} value at {@code (index - 1)}

  4815      * in the {@code char} array is in the

  4816      * low-surrogate range, {@code (index - 2)} is not less than

  4817      * {@code start}, and the {@code char} value at

  4818      * {@code (index - 2)} in the {@code char} array is in

  4819      * the high-surrogate range, then the supplementary code point

  4820      * corresponding to this surrogate pair is returned. Otherwise,

  4821      * the {@code char} value at {@code (index - 1)} is

  4822      * returned.

  4823      *

  4824      * @param a the {@code char} array

  4825      * @param index the index following the code point that should be returned

  4826      * @param start the index of the first array element in the

  4827      * {@code char} array

  4828      * @return the Unicode code point value before the given index.

  4829      * @exception NullPointerException if {@code a} is null.

  4830      * @exception IndexOutOfBoundsException if the {@code index}

  4831      * argument is not greater than the {@code start} argument or

  4832      * is greater than the length of the {@code char} array, or

  4833      * if the {@code start} argument is negative or not less than

  4834      * the length of the {@code char} array.

  4835      * @since  1.5

  4836      */

  4837     public static int codePointBefore(char[] a, int index, int start) {

  4838         if (index <= start || start < 0 || start >= a.length) {

  4839             throw new IndexOutOfBoundsException();

  4840         }

  4841         return codePointBeforeImpl(a, index, start);

  4842     }

  4844     // throws ArrayIndexOutofBoundsException if index-1 out of bounds

  4845     static int codePointBeforeImpl(char[] a, int index, int start) {

  4846         char c2 = a[--index];

  4847         if (isLowSurrogate(c2)) {

  4848             if (index > start) {

  4849                 char c1 = a[--index];

  4850                 if (isHighSurrogate(c1)) {

  4851                     return toCodePoint(c1, c2);

  4852                 }

  4853             }

  4854         }

  4855         return c2;

  4856     }

  4858     /**

  4859      * Returns the leading surrogate (a

  4860      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">

  4861      * high surrogate code unit</a>) of the

  4862      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  4863      * surrogate pair</a>

  4864      * representing the specified supplementary character (Unicode

  4865      * code point) in the UTF-16 encoding.  If the specified character

  4866      * is not a

  4867      * <a href="Character.html#supplementary">supplementary character</a>,

  4868      * an unspecified {@code char} is returned.

  4869      *

  4870      * <p>If

  4871      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}

  4872      * is {@code true}, then

  4873      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and

  4874      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}

  4875      * are also always {@code true}.

  4876      *

  4877      * @param   codePoint a supplementary character (Unicode code point)

  4878      * @return  the leading surrogate code unit used to represent the

  4879      *          character in the UTF-16 encoding

  4880      * @since   1.7

  4881      */

  4882     public static char highSurrogate(int codePoint) {

  4883         return (char) ((codePoint >>> 10)

  4884             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));

  4885     }

  4887     /**

  4888      * Returns the trailing surrogate (a

  4889      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">

  4890      * low surrogate code unit</a>) of the

  4891      * <a href="http://www.unicode.org/glossary/#surrogate_pair">

  4892      * surrogate pair</a>

  4893      * representing the specified supplementary character (Unicode

  4894      * code point) in the UTF-16 encoding.  If the specified character

  4895      * is not a

  4896      * <a href="Character.html#supplementary">supplementary character</a>,

  4897      * an unspecified {@code char} is returned.

  4898      *

  4899      * <p>If

  4900      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}

  4901      * is {@code true}, then

  4902      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and

  4903      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}

  4904      * are also always {@code true}.

  4905      *

  4906      * @param   codePoint a supplementary character (Unicode code point)

  4907      * @return  the trailing surrogate code unit used to represent the

  4908      *          character in the UTF-16 encoding

  4909      * @since   1.7

  4910      */

  4911     public static char lowSurrogate(int codePoint) {

  4912         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);

  4913     }

  4915     /**

  4916      * Converts the specified character (Unicode code point) to its

  4917      * UTF-16 representation. If the specified code point is a BMP

  4918      * (Basic Multilingual Plane or Plane 0) value, the same value is

  4919      * stored in {@code dst[dstIndex]}, and 1 is returned. If the

  4920      * specified code point is a supplementary character, its

  4921      * surrogate values are stored in {@code dst[dstIndex]}

  4922      * (high-surrogate) and {@code dst[dstIndex+1]}

  4923      * (low-surrogate), and 2 is returned.

  4924      *

  4925      * @param  codePoint the character (Unicode code point) to be converted.

  4926      * @param  dst an array of {@code char} in which the

  4927      * {@code codePoint}'s UTF-16 value is stored.

  4928      * @param dstIndex the start index into the {@code dst}

  4929      * array where the converted value is stored.

  4930      * @return 1 if the code point is a BMP code point, 2 if the

  4931      * code point is a supplementary code point.

  4932      * @exception IllegalArgumentException if the specified

  4933      * {@code codePoint} is not a valid Unicode code point.

  4934      * @exception NullPointerException if the specified {@code dst} is null.

  4935      * @exception IndexOutOfBoundsException if {@code dstIndex}

  4936      * is negative or not less than {@code dst.length}, or if

  4937      * {@code dst} at {@code dstIndex} doesn't have enough

  4938      * array element(s) to store the resulting {@code char}

  4939      * value(s). (If {@code dstIndex} is equal to

  4940      * {@code dst.length-1} and the specified

  4941      * {@code codePoint} is a supplementary character, the

  4942      * high-surrogate value is not stored in

  4943      * {@code dst[dstIndex]}.)

  4944      * @since  1.5

  4945      */

  4946     public static int toChars(int codePoint, char[] dst, int dstIndex) {

  4947         if (isBmpCodePoint(codePoint)) {

  4948             dst[dstIndex] = (char) codePoint;

  4949             return 1;

  4950         } else if (isValidCodePoint(codePoint)) {

  4951             toSurrogates(codePoint, dst, dstIndex);

  4952             return 2;

  4953         } else {

  4954             throw new IllegalArgumentException();

  4955         }

  4956     }

  4958     /**

  4959      * Converts the specified character (Unicode code point) to its

  4960      * UTF-16 representation stored in a {@code char} array. If

  4961      * the specified code point is a BMP (Basic Multilingual Plane or

  4962      * Plane 0) value, the resulting {@code char} array has

  4963      * the same value as {@code codePoint}. If the specified code

  4964      * point is a supplementary code point, the resulting

  4965      * {@code char} array has the corresponding surrogate pair.

  4966      *

  4967      * @param  codePoint a Unicode code point

  4968      * @return a {@code char} array having

  4969      *         {@code codePoint}'s UTF-16 representation.

  4970      * @exception IllegalArgumentException if the specified

  4971      * {@code codePoint} is not a valid Unicode code point.

  4972      * @since  1.5

  4973      */

  4974     public static char[] toChars(int codePoint) {

  4975         if (isBmpCodePoint(codePoint)) {

  4976             return new char[] { (char) codePoint };

  4977         } else if (isValidCodePoint(codePoint)) {

  4978             char[] result = new char[2];

  4979             toSurrogates(codePoint, result, 0);

  4980             return result;

  4981         } else {

  4982             throw new IllegalArgumentException();

  4983         }

  4984     }

  4986     static void toSurrogates(int codePoint, char[] dst, int index) {

  4987         // We write elements "backwards" to guarantee all-or-nothing

  4988         dst[index+1] = lowSurrogate(codePoint);

  4989         dst[index] = highSurrogate(codePoint);

  4990     }

  4992     /**

  4993      * Returns the number of Unicode code points in the text range of

  4994      * the specified char sequence. The text range begins at the

  4995      * specified {@code beginIndex} and extends to the

  4996      * {@code char} at index {@code endIndex - 1}. Thus the

  4997      * length (in {@code char}s) of the text range is

  4998      * {@code endIndex-beginIndex}. Unpaired surrogates within

  4999      * the text range count as one code point each.

  5000      *

  5001      * @param seq the char sequence

  5002      * @param beginIndex the index to the first {@code char} of

  5003      * the text range.

  5004      * @param endIndex the index after the last {@code char} of

  5005      * the text range.

  5006      * @return the number of Unicode code points in the specified text

  5007      * range

  5008      * @exception NullPointerException if {@code seq} is null.

  5009      * @exception IndexOutOfBoundsException if the

  5010      * {@code beginIndex} is negative, or {@code endIndex}

  5011      * is larger than the length of the given sequence, or

  5012      * {@code beginIndex} is larger than {@code endIndex}.

  5013      * @since  1.5

  5014      */

  5015     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {

  5016         int length = seq.length();

  5017         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {

  5018             throw new IndexOutOfBoundsException();

  5019         }

  5020         int n = endIndex - beginIndex;

  5021         for (int i = beginIndex; i < endIndex; ) {

  5022             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&

  5023                 isLowSurrogate(seq.charAt(i))) {

  5024                 n--;

  5025                 i++;

  5026             }

  5027         }

  5028         return n;

  5029     }

  5031     /**

  5032      * Returns the number of Unicode code points in a subarray of the

  5033      * {@code char} array argument. The {@code offset}

  5034      * argument is the index of the first {@code char} of the

  5035      * subarray and the {@code count} argument specifies the

  5036      * length of the subarray in {@code char}s. Unpaired

  5037      * surrogates within the subarray count as one code point each.

  5038      *

  5039      * @param a the {@code char} array

  5040      * @param offset the index of the first {@code char} in the

  5041      * given {@code char} array

  5042      * @param count the length of the subarray in {@code char}s

  5043      * @return the number of Unicode code points in the specified subarray

  5044      * @exception NullPointerException if {@code a} is null.

  5045      * @exception IndexOutOfBoundsException if {@code offset} or

  5046      * {@code count} is negative, or if {@code offset +

  5047      * count} is larger than the length of the given array.

  5048      * @since  1.5

  5049      */

  5050     public static int codePointCount(char[] a, int offset, int count) {

  5051         if (count > a.length - offset || offset < 0 || count < 0) {

  5052             throw new IndexOutOfBoundsException();

  5053         }

  5054         return codePointCountImpl(a, offset, count);

  5055     }

  5057     static int codePointCountImpl(char[] a, int offset, int count) {

  5058         int endIndex = offset + count;

  5059         int n = count;

  5060         for (int i = offset; i < endIndex; ) {

  5061             if (isHighSurrogate(a[i++]) && i < endIndex &&

  5062                 isLowSurrogate(a[i])) {

  5063                 n--;

  5064                 i++;

  5065             }

  5066         }

  5067         return n;

  5068     }

  5070     /**

  5071      * Returns the index within the given char sequence that is offset

  5072      * from the given {@code index} by {@code codePointOffset}

  5073      * code points. Unpaired surrogates within the text range given by

  5074      * {@code index} and {@code codePointOffset} count as

  5075      * one code point each.

  5076      *

  5077      * @param seq the char sequence

  5078      * @param index the index to be offset

  5079      * @param codePointOffset the offset in code points

  5080      * @return the index within the char sequence

  5081      * @exception NullPointerException if {@code seq} is null.

  5082      * @exception IndexOutOfBoundsException if {@code index}

  5083      *   is negative or larger then the length of the char sequence,

  5084      *   or if {@code codePointOffset} is positive and the

  5085      *   subsequence starting with {@code index} has fewer than

  5086      *   {@code codePointOffset} code points, or if

  5087      *   {@code codePointOffset} is negative and the subsequence

  5088      *   before {@code index} has fewer than the absolute value

  5089      *   of {@code codePointOffset} code points.

  5090      * @since 1.5

  5091      */

  5092     public static int offsetByCodePoints(CharSequence seq, int index,

  5093                                          int codePointOffset) {

  5094         int length = seq.length();

  5095         if (index < 0 || index > length) {

  5096             throw new IndexOutOfBoundsException();

  5097         }

  5099         int x = index;

  5100         if (codePointOffset >= 0) {

  5101             int i;

  5102             for (i = 0; x < length && i < codePointOffset; i++) {

  5103                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&

  5104                     isLowSurrogate(seq.charAt(x))) {

  5105                     x++;

  5106                 }

  5107             }

  5108             if (i < codePointOffset) {

  5109                 throw new IndexOutOfBoundsException();

  5110             }

  5111         } else {

  5112             int i;

  5113             for (i = codePointOffset; x > 0 && i < 0; i++) {

  5114                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&

  5115                     isHighSurrogate(seq.charAt(x-1))) {

  5116                     x--;

  5117                 }

  5118             }

  5119             if (i < 0) {

  5120                 throw new IndexOutOfBoundsException();

  5121             }

  5122         }

  5123         return x;

  5124     }

  5126     /**

  5127      * Returns the index within the given {@code char} subarray

  5128      * that is offset from the given {@code index} by

  5129      * {@code codePointOffset} code points. The

  5130      * {@code start} and {@code count} arguments specify a

  5131      * subarray of the {@code char} array. Unpaired surrogates

  5132      * within the text range given by {@code index} and

  5133      * {@code codePointOffset} count as one code point each.

  5134      *

  5135      * @param a the {@code char} array

  5136      * @param start the index of the first {@code char} of the

  5137      * subarray

  5138      * @param count the length of the subarray in {@code char}s

  5139      * @param index the index to be offset

  5140      * @param codePointOffset the offset in code points

  5141      * @return the index within the subarray

  5142      * @exception NullPointerException if {@code a} is null.

  5143      * @exception IndexOutOfBoundsException

  5144      *   if {@code start} or {@code count} is negative,

  5145      *   or if {@code start + count} is larger than the length of

  5146      *   the given array,

  5147      *   or if {@code index} is less than {@code start} or

  5148      *   larger then {@code start + count},

  5149      *   or if {@code codePointOffset} is positive and the text range

  5150      *   starting with {@code index} and ending with {@code start + count - 1}

  5151      *   has fewer than {@code codePointOffset} code

  5152      *   points,

  5153      *   or if {@code codePointOffset} is negative and the text range

  5154      *   starting with {@code start} and ending with {@code index - 1}

  5155      *   has fewer than the absolute value of

  5156      *   {@code codePointOffset} code points.

  5157      * @since 1.5

  5158      */

  5159     public static int offsetByCodePoints(char[] a, int start, int count,

  5160                                          int index, int codePointOffset) {

  5161         if (count > a.length-start || start < 0 || count < 0

  5162             || index < start || index > start+count) {

  5163             throw new IndexOutOfBoundsException();

  5164         }

  5165         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);

  5166     }

  5168     static int offsetByCodePointsImpl(char[]a, int start, int count,

  5169                                       int index, int codePointOffset) {

  5170         int x = index;

  5171         if (codePointOffset >= 0) {

  5172             int limit = start + count;

  5173             int i;

  5174             for (i = 0; x < limit && i < codePointOffset; i++) {

  5175                 if (isHighSurrogate(a[x++]) && x < limit &&

  5176                     isLowSurrogate(a[x])) {

  5177                     x++;

  5178                 }

  5179             }

  5180             if (i < codePointOffset) {

  5181                 throw new IndexOutOfBoundsException();

  5182             }

  5183         } else {

  5184             int i;

  5185             for (i = codePointOffset; x > start && i < 0; i++) {

  5186                 if (isLowSurrogate(a[--x]) && x > start &&

  5187                     isHighSurrogate(a[x-1])) {

  5188                     x--;

  5189                 }

  5190             }

  5191             if (i < 0) {

  5192                 throw new IndexOutOfBoundsException();

  5193             }

  5194         }

  5195         return x;

  5196     }

  5198     /**

  5199      * Determines if the specified character is a lowercase character.

  5200      * <p>

  5201      * A character is lowercase if its general category type, provided

  5202      * by {@code Character.getType(ch)}, is

  5203      * {@code LOWERCASE_LETTER}, or it has contributory property

  5204      * Other_Lowercase as defined by the Unicode Standard.

  5205      * <p>

  5206      * The following are examples of lowercase characters:

  5207      * <p><blockquote><pre>

  5208      * a b c d e f g h i j k l m n o p q r s t u v w x y z

  5209      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'

  5210      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'

  5211      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'

  5212      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'

  5213      * </pre></blockquote>

  5214      * <p> Many other Unicode characters are lowercase too.

  5215      *

  5216      * <p><b>Note:</b> This method cannot handle <a

  5217      * href="#supplementary"> supplementary characters</a>. To support

  5218      * all Unicode characters, including supplementary characters, use

  5219      * the {@link #isLowerCase(int)} method.

  5220      *

  5221      * @param   ch   the character to be tested.

  5222      * @return  {@code true} if the character is lowercase;

  5223      *          {@code false} otherwise.

  5224      * @see     Character#isLowerCase(char)

  5225      * @see     Character#isTitleCase(char)

  5226      * @see     Character#toLowerCase(char)

  5227      * @see     Character#getType(char)

  5228      */

  5229     public static boolean isLowerCase(char ch) {

  5230         return isLowerCase((int)ch);

  5231     }

  5233     /**

  5234      * Determines if the specified character (Unicode code point) is a

  5235      * lowercase character.

  5236      * <p>

  5237      * A character is lowercase if its general category type, provided

  5238      * by {@link Character#getType getType(codePoint)}, is

  5239      * {@code LOWERCASE_LETTER}, or it has contributory property

  5240      * Other_Lowercase as defined by the Unicode Standard.

  5241      * <p>

  5242      * The following are examples of lowercase characters:

  5243      * <p><blockquote><pre>

  5244      * a b c d e f g h i j k l m n o p q r s t u v w x y z

  5245      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'

  5246      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'

  5247      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'

  5248      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'

  5249      * </pre></blockquote>

  5250      * <p> Many other Unicode characters are lowercase too.

  5251      *

  5252      * @param   codePoint the character (Unicode code point) to be tested.

  5253      * @return  {@code true} if the character is lowercase;

  5254      *          {@code false} otherwise.

  5255      * @see     Character#isLowerCase(int)

  5256      * @see     Character#isTitleCase(int)

  5257      * @see     Character#toLowerCase(int)

  5258      * @see     Character#getType(int)

  5259      * @since   1.5

  5260      */

  5261     public static boolean isLowerCase(int codePoint) {

  5262         return getType(codePoint) == Character.LOWERCASE_LETTER ||

  5263                CharacterData.of(codePoint).isOtherLowercase(codePoint);

  5264     }

  5266     /**

  5267      * Determines if the specified character is an uppercase character.

  5268      * <p>

  5269      * A character is uppercase if its general category type, provided by

  5270      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.

  5271      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.

  5272      * <p>

  5273      * The following are examples of uppercase characters:

  5274      * <p><blockquote><pre>

  5275      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

  5276      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'

  5277      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'

  5278      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'

  5279      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'

  5280      * </pre></blockquote>

  5281      * <p> Many other Unicode characters are uppercase too.<p>

  5282      *

  5283      * <p><b>Note:</b> This method cannot handle <a

  5284      * href="#supplementary"> supplementary characters</a>. To support

  5285      * all Unicode characters, including supplementary characters, use

  5286      * the {@link #isUpperCase(int)} method.

  5287      *

  5288      * @param   ch   the character to be tested.

  5289      * @return  {@code true} if the character is uppercase;

  5290      *          {@code false} otherwise.

  5291      * @see     Character#isLowerCase(char)

  5292      * @see     Character#isTitleCase(char)

  5293      * @see     Character#toUpperCase(char)

  5294      * @see     Character#getType(char)

  5295      * @since   1.0

  5296      */

  5297     public static boolean isUpperCase(char ch) {

  5298         return isUpperCase((int)ch);

  5299     }

  5301     /**

  5302      * Determines if the specified character (Unicode code point) is an uppercase character.

  5303      * <p>

  5304      * A character is uppercase if its general category type, provided by

  5305      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},

  5306      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.

  5307      * <p>

  5308      * The following are examples of uppercase characters:

  5309      * <p><blockquote><pre>

  5310      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

  5311      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'

  5312      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'

  5313      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'

  5314      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'

  5315      * </pre></blockquote>

  5316      * <p> Many other Unicode characters are uppercase too.<p>

  5317      *

  5318      * @param   codePoint the character (Unicode code point) to be tested.

  5319      * @return  {@code true} if the character is uppercase;

  5320      *          {@code false} otherwise.

  5321      * @see     Character#isLowerCase(int)

  5322      * @see     Character#isTitleCase(int)

  5323      * @see     Character#toUpperCase(int)

  5324      * @see     Character#getType(int)

  5325      * @since   1.5

  5326      */

  5327     public static boolean isUpperCase(int codePoint) {

  5328         return getType(codePoint) == Character.UPPERCASE_LETTER ||

  5329                CharacterData.of(codePoint).isOtherUppercase(codePoint);

  5330     }

  5332     /**

  5333      * Determines if the specified character is a titlecase character.

  5334      * <p>

  5335      * A character is a titlecase character if its general

  5336      * category type, provided by {@code Character.getType(ch)},

  5337      * is {@code TITLECASE_LETTER}.

  5338      * <p>

  5339      * Some characters look like pairs of Latin letters. For example, there

  5340      * is an uppercase letter that looks like "LJ" and has a corresponding

  5341      * lowercase letter that looks like "lj". A third form, which looks like "Lj",

  5342      * is the appropriate form to use when rendering a word in lowercase

  5343      * with initial capitals, as for a book title.

  5344      * <p>

  5345      * These are some of the Unicode characters for which this method returns

  5346      * {@code true}:

  5347      * <ul>

  5348      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}

  5349      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}

  5350      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}

  5351      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}

  5352      * </ul>

  5353      * <p> Many other Unicode characters are titlecase too.<p>

  5354      *

  5355      * <p><b>Note:</b> This method cannot handle <a

  5356      * href="#supplementary"> supplementary characters</a>. To support

  5357      * all Unicode characters, including supplementary characters, use

  5358      * the {@link #isTitleCase(int)} method.

  5359      *

  5360      * @param   ch   the character to be tested.

  5361      * @return  {@code true} if the character is titlecase;

  5362      *          {@code false} otherwise.

  5363      * @see     Character#isLowerCase(char)

  5364      * @see     Character#isUpperCase(char)

  5365      * @see     Character#toTitleCase(char)

  5366      * @see     Character#getType(char)

  5367      * @since   1.0.2

  5368      */

  5369     public static boolean isTitleCase(char ch) {

  5370         return isTitleCase((int)ch);

  5371     }

  5373     /**

  5374      * Determines if the specified character (Unicode code point) is a titlecase character.

  5375      * <p>

  5376      * A character is a titlecase character if its general

  5377      * category type, provided by {@link Character#getType(int) getType(codePoint)},

  5378      * is {@code TITLECASE_LETTER}.

  5379      * <p>

  5380      * Some characters look like pairs of Latin letters. For example, there

  5381      * is an uppercase letter that looks like "LJ" and has a corresponding

  5382      * lowercase letter that looks like "lj". A third form, which looks like "Lj",

  5383      * is the appropriate form to use when rendering a word in lowercase

  5384      * with initial capitals, as for a book title.

  5385      * <p>

  5386      * These are some of the Unicode characters for which this method returns

  5387      * {@code true}:

  5388      * <ul>

  5389      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}

  5390      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}

  5391      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}

  5392      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}

  5393      * </ul>

  5394      * <p> Many other Unicode characters are titlecase too.<p>

  5395      *

  5396      * @param   codePoint the character (Unicode code point) to be tested.

  5397      * @return  {@code true} if the character is titlecase;

  5398      *          {@code false} otherwise.

  5399      * @see     Character#isLowerCase(int)

  5400      * @see     Character#isUpperCase(int)

  5401      * @see     Character#toTitleCase(int)

  5402      * @see     Character#getType(int)

  5403      * @since   1.5

  5404      */

  5405     public static boolean isTitleCase(int codePoint) {

  5406         return getType(codePoint) == Character.TITLECASE_LETTER;

  5407     }

  5409     /**

  5410      * Determines if the specified character is a digit.

  5411      * <p>

  5412      * A character is a digit if its general category type, provided

  5413      * by {@code Character.getType(ch)}, is

  5414      * {@code DECIMAL_DIGIT_NUMBER}.

  5415      * <p>

  5416      * Some Unicode character ranges that contain digits:

  5417      * <ul>

  5418      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},

  5419      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})

  5420      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},

  5421      *     Arabic-Indic digits

  5422      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},

  5423      *     Extended Arabic-Indic digits

  5424      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},

  5425      *     Devanagari digits

  5426      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},

  5427      *     Fullwidth digits

  5428      * </ul>

  5429      *

  5430      * Many other character ranges contain digits as well.

  5431      *

  5432      * <p><b>Note:</b> This method cannot handle <a

  5433      * href="#supplementary"> supplementary characters</a>. To support

  5434      * all Unicode characters, including supplementary characters, use

  5435      * the {@link #isDigit(int)} method.

  5436      *

  5437      * @param   ch   the character to be tested.

  5438      * @return  {@code true} if the character is a digit;

  5439      *          {@code false} otherwise.

  5440      * @see     Character#digit(char, int)

  5441      * @see     Character#forDigit(int, int)

  5442      * @see     Character#getType(char)

  5443      */

  5444     public static boolean isDigit(char ch) {

  5445         return isDigit((int)ch);

  5446     }

  5448     /**

  5449      * Determines if the specified character (Unicode code point) is a digit.

  5450      * <p>

  5451      * A character is a digit if its general category type, provided

  5452      * by {@link Character#getType(int) getType(codePoint)}, is

  5453      * {@code DECIMAL_DIGIT_NUMBER}.

  5454      * <p>

  5455      * Some Unicode character ranges that contain digits:

  5456      * <ul>

  5457      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},

  5458      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})

  5459      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},

  5460      *     Arabic-Indic digits

  5461      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},

  5462      *     Extended Arabic-Indic digits

  5463      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},

  5464      *     Devanagari digits

  5465      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},

  5466      *     Fullwidth digits

  5467      * </ul>

  5468      *

  5469      * Many other character ranges contain digits as well.

  5470      *

  5471      * @param   codePoint the character (Unicode code point) to be tested.

  5472      * @return  {@code true} if the character is a digit;

  5473      *          {@code false} otherwise.

  5474      * @see     Character#forDigit(int, int)

  5475      * @see     Character#getType(int)

  5476      * @since   1.5

  5477      */

  5478     public static boolean isDigit(int codePoint) {

  5479         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;

  5480     }

  5482     /**

  5483      * Determines if a character is defined in Unicode.

  5484      * <p>

  5485      * A character is defined if at least one of the following is true:

  5486      * <ul>

  5487      * <li>It has an entry in the UnicodeData file.

  5488      * <li>It has a value in a range defined by the UnicodeData file.

  5489      * </ul>

  5490      *

  5491      * <p><b>Note:</b> This method cannot handle <a

  5492      * href="#supplementary"> supplementary characters</a>. To support

  5493      * all Unicode characters, including supplementary characters, use

  5494      * the {@link #isDefined(int)} method.

  5495      *

  5496      * @param   ch   the character to be tested

  5497      * @return  {@code true} if the character has a defined meaning

  5498      *          in Unicode; {@code false} otherwise.

  5499      * @see     Character#isDigit(char)

  5500      * @see     Character#isLetter(char)

  5501      * @see     Character#isLetterOrDigit(char)

  5502      * @see     Character#isLowerCase(char)

  5503      * @see     Character#isTitleCase(char)

  5504      * @see     Character#isUpperCase(char)

  5505      * @since   1.0.2

  5506      */

  5507     public static boolean isDefined(char ch) {

  5508         return isDefined((int)ch);

  5509     }

  5511     /**

  5512      * Determines if a character (Unicode code point) is defined in Unicode.

  5513      * <p>

  5514      * A character is defined if at least one of the following is true:

  5515      * <ul>

  5516      * <li>It has an entry in the UnicodeData file.

  5517      * <li>It has a value in a range defined by the UnicodeData file.

  5518      * </ul>

  5519      *

  5520      * @param   codePoint the character (Unicode code point) to be tested.

  5521      * @return  {@code true} if the character has a defined meaning

  5522      *          in Unicode; {@code false} otherwise.

  5523      * @see     Character#isDigit(int)

  5524      * @see     Character#isLetter(int)

  5525      * @see     Character#isLetterOrDigit(int)

  5526      * @see     Character#isLowerCase(int)

  5527      * @see     Character#isTitleCase(int)

  5528      * @see     Character#isUpperCase(int)

  5529      * @since   1.5

  5530      */

  5531     public static boolean isDefined(int codePoint) {

  5532         return getType(codePoint) != Character.UNASSIGNED;

  5533     }

  5535     /**

  5536      * Determines if the specified character is a letter.

  5537      * <p>

  5538      * A character is considered to be a letter if its general

  5539      * category type, provided by {@code Character.getType(ch)},

  5540      * is any of the following:

  5541      * <ul>

  5542      * <li> {@code UPPERCASE_LETTER}

  5543      * <li> {@code LOWERCASE_LETTER}

  5544      * <li> {@code TITLECASE_LETTER}

  5545      * <li> {@code MODIFIER_LETTER}

  5546      * <li> {@code OTHER_LETTER}

  5547      * </ul>

  5548      *

  5549      * Not all letters have case. Many characters are

  5550      * letters but are neither uppercase nor lowercase nor titlecase.

  5551      *

  5552      * <p><b>Note:</b> This method cannot handle <a

  5553      * href="#supplementary"> supplementary characters</a>. To support

  5554      * all Unicode characters, including supplementary characters, use

  5555      * the {@link #isLetter(int)} method.

  5556      *

  5557      * @param   ch   the character to be tested.

  5558      * @return  {@code true} if the character is a letter;

  5559      *          {@code false} otherwise.

  5560      * @see     Character#isDigit(char)

  5561      * @see     Character#isJavaIdentifierStart(char)

  5562      * @see     Character#isJavaLetter(char)

  5563      * @see     Character#isJavaLetterOrDigit(char)

  5564      * @see     Character#isLetterOrDigit(char)

  5565      * @see     Character#isLowerCase(char)

  5566      * @see     Character#isTitleCase(char)

  5567      * @see     Character#isUnicodeIdentifierStart(char)

  5568      * @see     Character#isUpperCase(char)

  5569      */

  5570     public static boolean isLetter(char ch) {

  5571         return isLetter((int)ch);

  5572     }

  5574     /**

  5575      * Determines if the specified character (Unicode code point) is a letter.

  5576      * <p>

  5577      * A character is considered to be a letter if its general

  5578      * category type, provided by {@link Character#getType(int) getType(codePoint)},

  5579      * is any of the following:

  5580      * <ul>

  5581      * <li> {@code UPPERCASE_LETTER}

  5582      * <li> {@code LOWERCASE_LETTER}

  5583      * <li> {@code TITLECASE_LETTER}

  5584      * <li> {@code MODIFIER_LETTER}

  5585      * <li> {@code OTHER_LETTER}

  5586      * </ul>

  5587      *

  5588      * Not all letters have case. Many characters are

  5589      * letters but are neither uppercase nor lowercase nor titlecase.

  5590      *

  5591      * @param   codePoint the character (Unicode code point) to be tested.

  5592      * @return  {@code true} if the character is a letter;

  5593      *          {@code false} otherwise.

  5594      * @see     Character#isDigit(int)

  5595      * @see     Character#isJavaIdentifierStart(int)

  5596      * @see     Character#isLetterOrDigit(int)

  5597      * @see     Character#isLowerCase(int)

  5598      * @see     Character#isTitleCase(int)

  5599      * @see     Character#isUnicodeIdentifierStart(int)

  5600      * @see     Character#isUpperCase(int)

  5601      * @since   1.5

  5602      */

  5603     public static boolean isLetter(int codePoint) {

  5604         return ((((1 << Character.UPPERCASE_LETTER) |

  5605             (1 << Character.LOWERCASE_LETTER) |

  5606             (1 << Character.TITLECASE_LETTER) |

  5607             (1 << Character.MODIFIER_LETTER) |

  5608             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)

  5609             != 0;

  5610     }

  5612     /**

  5613      * Determines if the specified character is a letter or digit.

  5614      * <p>

  5615      * A character is considered to be a letter or digit if either

  5616      * {@code Character.isLetter(char ch)} or

  5617      * {@code Character.isDigit(char ch)} returns

  5618      * {@code true} for the character.

  5619      *

  5620      * <p><b>Note:</b> This method cannot handle <a

  5621      * href="#supplementary"> supplementary characters</a>. To support

  5622      * all Unicode characters, including supplementary characters, use

  5623      * the {@link #isLetterOrDigit(int)} method.

  5624      *

  5625      * @param   ch   the character to be tested.

  5626      * @return  {@code true} if the character is a letter or digit;

  5627      *          {@code false} otherwise.

  5628      * @see     Character#isDigit(char)

  5629      * @see     Character#isJavaIdentifierPart(char)

  5630      * @see     Character#isJavaLetter(char)

  5631      * @see     Character#isJavaLetterOrDigit(char)

  5632      * @see     Character#isLetter(char)

  5633      * @see     Character#isUnicodeIdentifierPart(char)

  5634      * @since   1.0.2

  5635      */

  5636     public static boolean isLetterOrDigit(char ch) {

  5637         return isLetterOrDigit((int)ch);

  5638     }

  5640     /**

  5641      * Determines if the specified character (Unicode code point) is a letter or digit.

  5642      * <p>

  5643      * A character is considered to be a letter or digit if either

  5644      * {@link #isLetter(int) isLetter(codePoint)} or

  5645      * {@link #isDigit(int) isDigit(codePoint)} returns

  5646      * {@code true} for the character.

  5647      *

  5648      * @param   codePoint the character (Unicode code point) to be tested.

  5649      * @return  {@code true} if the character is a letter or digit;

  5650      *          {@code false} otherwise.

  5651      * @see     Character#isDigit(int)

  5652      * @see     Character#isJavaIdentifierPart(int)

  5653      * @see     Character#isLetter(int)

  5654      * @see     Character#isUnicodeIdentifierPart(int)

  5655      * @since   1.5

  5656      */

  5657     public static boolean isLetterOrDigit(int codePoint) {

  5658         return ((((1 << Character.UPPERCASE_LETTER) |

  5659             (1 << Character.LOWERCASE_LETTER) |

  5660             (1 << Character.TITLECASE_LETTER) |

  5661             (1 << Character.MODIFIER_LETTER) |

  5662             (1 << Character.OTHER_LETTER) |

  5663             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)

  5664             != 0;

  5665     }

  5667     /**

  5668      * Determines if the specified character is permissible as the first

  5669      * character in a Java identifier.

  5670      * <p>

  5671      * A character may start a Java identifier if and only if

  5672      * one of the following is true:

  5673      * <ul>

  5674      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}

  5675      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}

  5676      * <li> {@code ch} is a currency symbol (such as {@code '$'})

  5677      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).

  5678      * </ul>

  5679      *

  5680      * @param   ch the character to be tested.

  5681      * @return  {@code true} if the character may start a Java

  5682      *          identifier; {@code false} otherwise.

  5683      * @see     Character#isJavaLetterOrDigit(char)

  5684      * @see     Character#isJavaIdentifierStart(char)

  5685      * @see     Character#isJavaIdentifierPart(char)

  5686      * @see     Character#isLetter(char)

  5687      * @see     Character#isLetterOrDigit(char)

  5688      * @see     Character#isUnicodeIdentifierStart(char)

  5689      * @since   1.02

  5690      * @deprecated Replaced by isJavaIdentifierStart(char).

  5691      */

  5692     @Deprecated

  5693     public static boolean isJavaLetter(char ch) {

  5694         return isJavaIdentifierStart(ch);

  5695     }

  5697     /**

  5698      * Determines if the specified character may be part of a Java

  5699      * identifier as other than the first character.

  5700      * <p>

  5701      * A character may be part of a Java identifier if and only if any

  5702      * of the following are true:

  5703      * <ul>

  5704      * <li>  it is a letter

  5705      * <li>  it is a currency symbol (such as {@code '$'})

  5706      * <li>  it is a connecting punctuation character (such as {@code '_'})

  5707      * <li>  it is a digit

  5708      * <li>  it is a numeric letter (such as a Roman numeral character)

  5709      * <li>  it is a combining mark

  5710      * <li>  it is a non-spacing mark

  5711      * <li> {@code isIdentifierIgnorable} returns

  5712      * {@code true} for the character.

  5713      * </ul>

  5714      *

  5715      * @param   ch the character to be tested.

  5716      * @return  {@code true} if the character may be part of a

  5717      *          Java identifier; {@code false} otherwise.

  5718      * @see     Character#isJavaLetter(char)

  5719      * @see     Character#isJavaIdentifierStart(char)

  5720      * @see     Character#isJavaIdentifierPart(char)

  5721      * @see     Character#isLetter(char)

  5722      * @see     Character#isLetterOrDigit(char)

  5723      * @see     Character#isUnicodeIdentifierPart(char)

  5724      * @see     Character#isIdentifierIgnorable(char)

  5725      * @since   1.02

  5726      * @deprecated Replaced by isJavaIdentifierPart(char).

  5727      */

  5728     @Deprecated

  5729     public static boolean isJavaLetterOrDigit(char ch) {

  5730         return isJavaIdentifierPart(ch);

  5731     }

  5733     /**

  5734      * Determines if the specified character (Unicode code point) is an alphabet.

  5735      * <p>

  5736      * A character is considered to be alphabetic if its general category type,

  5737      * provided by {@link Character#getType(int) getType(codePoint)}, is any of

  5738      * the following:

  5739      * <ul>

  5740      * <li> <code>UPPERCASE_LETTER</code>

  5741      * <li> <code>LOWERCASE_LETTER</code>

  5742      * <li> <code>TITLECASE_LETTER</code>

  5743      * <li> <code>MODIFIER_LETTER</code>

  5744      * <li> <code>OTHER_LETTER</code>

  5745      * <li> <code>LETTER_NUMBER</code>

  5746      * </ul>

  5747      * or it has contributory property Other_Alphabetic as defined by the

  5748      * Unicode Standard.

  5749      *

  5750      * @param   codePoint the character (Unicode code point) to be tested.

  5751      * @return  <code>true</code> if the character is a Unicode alphabet

  5752      *          character, <code>false</code> otherwise.

  5753      * @since   1.7

  5754      */

  5755     public static boolean isAlphabetic(int codePoint) {

  5756         return (((((1 << Character.UPPERCASE_LETTER) |

  5757             (1 << Character.LOWERCASE_LETTER) |

  5758             (1 << Character.TITLECASE_LETTER) |

  5759             (1 << Character.MODIFIER_LETTER) |

  5760             (1 << Character.OTHER_LETTER) |

  5761             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||

  5762             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);

  5763     }

  5765     /**

  5766      * Determines if the specified character (Unicode code point) is a CJKV

  5767      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by

  5768      * the Unicode Standard.

  5769      *

  5770      * @param   codePoint the character (Unicode code point) to be tested.

  5771      * @return  <code>true</code> if the character is a Unicode ideograph

  5772      *          character, <code>false</code> otherwise.

  5773      * @since   1.7

  5774      */

  5775     public static boolean isIdeographic(int codePoint) {

  5776         return CharacterData.of(codePoint).isIdeographic(codePoint);

  5777     }

  5779     /**

  5780      * Determines if the specified character is

  5781      * permissible as the first character in a Java identifier.

  5782      * <p>

  5783      * A character may start a Java identifier if and only if

  5784      * one of the following conditions is true:

  5785      * <ul>

  5786      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}

  5787      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}

  5788      * <li> {@code ch} is a currency symbol (such as {@code '$'})

  5789      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).

  5790      * </ul>

  5791      *

  5792      * <p><b>Note:</b> This method cannot handle <a

  5793      * href="#supplementary"> supplementary characters</a>. To support

  5794      * all Unicode characters, including supplementary characters, use

  5795      * the {@link #isJavaIdentifierStart(int)} method.

  5796      *

  5797      * @param   ch the character to be tested.

  5798      * @return  {@code true} if the character may start a Java identifier;

  5799      *          {@code false} otherwise.

  5800      * @see     Character#isJavaIdentifierPart(char)

  5801      * @see     Character#isLetter(char)

  5802      * @see     Character#isUnicodeIdentifierStart(char)

  5803      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  5804      * @since   1.1

  5805      */

  5806     public static boolean isJavaIdentifierStart(char ch) {

  5807         return isJavaIdentifierStart((int)ch);

  5808     }

  5810     /**

  5811      * Determines if the character (Unicode code point) is

  5812      * permissible as the first character in a Java identifier.

  5813      * <p>

  5814      * A character may start a Java identifier if and only if

  5815      * one of the following conditions is true:

  5816      * <ul>

  5817      * <li> {@link #isLetter(int) isLetter(codePoint)}

  5818      *      returns {@code true}

  5819      * <li> {@link #getType(int) getType(codePoint)}

  5820      *      returns {@code LETTER_NUMBER}

  5821      * <li> the referenced character is a currency symbol (such as {@code '$'})

  5822      * <li> the referenced character is a connecting punctuation character

  5823      *      (such as {@code '_'}).

  5824      * </ul>

  5825      *

  5826      * @param   codePoint the character (Unicode code point) to be tested.

  5827      * @return  {@code true} if the character may start a Java identifier;

  5828      *          {@code false} otherwise.

  5829      * @see     Character#isJavaIdentifierPart(int)

  5830      * @see     Character#isLetter(int)

  5831      * @see     Character#isUnicodeIdentifierStart(int)

  5832      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  5833      * @since   1.5

  5834      */

  5835     public static boolean isJavaIdentifierStart(int codePoint) {

  5836         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);

  5837     }

  5839     /**

  5840      * Determines if the specified character may be part of a Java

  5841      * identifier as other than the first character.

  5842      * <p>

  5843      * A character may be part of a Java identifier if any of the following

  5844      * are true:

  5845      * <ul>

  5846      * <li>  it is a letter

  5847      * <li>  it is a currency symbol (such as {@code '$'})

  5848      * <li>  it is a connecting punctuation character (such as {@code '_'})

  5849      * <li>  it is a digit

  5850      * <li>  it is a numeric letter (such as a Roman numeral character)

  5851      * <li>  it is a combining mark

  5852      * <li>  it is a non-spacing mark

  5853      * <li> {@code isIdentifierIgnorable} returns

  5854      * {@code true} for the character

  5855      * </ul>

  5856      *

  5857      * <p><b>Note:</b> This method cannot handle <a

  5858      * href="#supplementary"> supplementary characters</a>. To support

  5859      * all Unicode characters, including supplementary characters, use

  5860      * the {@link #isJavaIdentifierPart(int)} method.

  5861      *

  5862      * @param   ch      the character to be tested.

  5863      * @return {@code true} if the character may be part of a

  5864      *          Java identifier; {@code false} otherwise.

  5865      * @see     Character#isIdentifierIgnorable(char)

  5866      * @see     Character#isJavaIdentifierStart(char)

  5867      * @see     Character#isLetterOrDigit(char)

  5868      * @see     Character#isUnicodeIdentifierPart(char)

  5869      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  5870      * @since   1.1

  5871      */

  5872     public static boolean isJavaIdentifierPart(char ch) {

  5873         return isJavaIdentifierPart((int)ch);

  5874     }

  5876     /**

  5877      * Determines if the character (Unicode code point) may be part of a Java

  5878      * identifier as other than the first character.

  5879      * <p>

  5880      * A character may be part of a Java identifier if any of the following

  5881      * are true:

  5882      * <ul>

  5883      * <li>  it is a letter

  5884      * <li>  it is a currency symbol (such as {@code '$'})

  5885      * <li>  it is a connecting punctuation character (such as {@code '_'})

  5886      * <li>  it is a digit

  5887      * <li>  it is a numeric letter (such as a Roman numeral character)

  5888      * <li>  it is a combining mark

  5889      * <li>  it is a non-spacing mark

  5890      * <li> {@link #isIdentifierIgnorable(int)

  5891      * isIdentifierIgnorable(codePoint)} returns {@code true} for

  5892      * the character

  5893      * </ul>

  5894      *

  5895      * @param   codePoint the character (Unicode code point) to be tested.

  5896      * @return {@code true} if the character may be part of a

  5897      *          Java identifier; {@code false} otherwise.

  5898      * @see     Character#isIdentifierIgnorable(int)

  5899      * @see     Character#isJavaIdentifierStart(int)

  5900      * @see     Character#isLetterOrDigit(int)

  5901      * @see     Character#isUnicodeIdentifierPart(int)

  5902      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)

  5903      * @since   1.5

  5904      */

  5905     public static boolean isJavaIdentifierPart(int codePoint) {

  5906         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);

  5907     }

  5909     /**

  5910      * Determines if the specified character is permissible as the

  5911      * first character in a Unicode identifier.

  5912      * <p>

  5913      * A character may start a Unicode identifier if and only if

  5914      * one of the following conditions is true:

  5915      * <ul>

  5916      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}

  5917      * <li> {@link #getType(char) getType(ch)} returns

  5918      *      {@code LETTER_NUMBER}.

  5919      * </ul>

  5920      *

  5921      * <p><b>Note:</b> This method cannot handle <a

  5922      * href="#supplementary"> supplementary characters</a>. To support

  5923      * all Unicode characters, including supplementary characters, use

  5924      * the {@link #isUnicodeIdentifierStart(int)} method.

  5925      *

  5926      * @param   ch      the character to be tested.

  5927      * @return  {@code true} if the character may start a Unicode

  5928      *          identifier; {@code false} otherwise.

  5929      * @see     Character#isJavaIdentifierStart(char)

  5930      * @see     Character#isLetter(char)

  5931      * @see     Character#isUnicodeIdentifierPart(char)

  5932      * @since   1.1

  5933      */

  5934     public static boolean isUnicodeIdentifierStart(char ch) {

  5935         return isUnicodeIdentifierStart((int)ch);

  5936     }

  5938     /**

  5939      * Determines if the specified character (Unicode code point) is permissible as the

  5940      * first character in a Unicode identifier.

  5941      * <p>

  5942      * A character may start a Unicode identifier if and only if

  5943      * one of the following conditions is true:

  5944      * <ul>

  5945      * <li> {@link #isLetter(int) isLetter(codePoint)}

  5946      *      returns {@code true}

  5947      * <li> {@link #getType(int) getType(codePoint)}

  5948      *      returns {@code LETTER_NUMBER}.

  5949      * </ul>

  5950      * @param   codePoint the character (Unicode code point) to be tested.

  5951      * @return  {@code true} if the character may start a Unicode

  5952      *          identifier; {@code false} otherwise.

  5953      * @see     Character#isJavaIdentifierStart(int)

  5954      * @see     Character#isLetter(int)

  5955      * @see     Character#isUnicodeIdentifierPart(int)

  5956      * @since   1.5

  5957      */

  5958     public static boolean isUnicodeIdentifierStart(int codePoint) {

  5959         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);

  5960     }

  5962     /**

  5963      * Determines if the specified character may be part of a Unicode

  5964      * identifier as other than the first character.

  5965      * <p>

  5966      * A character may be part of a Unicode identifier if and only if

  5967      * one of the following statements is true:

  5968      * <ul>

  5969      * <li>  it is a letter

  5970      * <li>  it is a connecting punctuation character (such as {@code '_'})

  5971      * <li>  it is a digit

  5972      * <li>  it is a numeric letter (such as a Roman numeral character)

  5973      * <li>  it is a combining mark

  5974      * <li>  it is a non-spacing mark

  5975      * <li> {@code isIdentifierIgnorable} returns

  5976      * {@code true} for this character.

  5977      * </ul>

  5978      *

  5979      * <p><b>Note:</b> This method cannot handle <a

  5980      * href="#supplementary"> supplementary characters</a>. To support

  5981      * all Unicode characters, including supplementary characters, use

  5982      * the {@link #isUnicodeIdentifierPart(int)} method.

  5983      *

  5984      * @param   ch      the character to be tested.

  5985      * @return  {@code true} if the character may be part of a

  5986      *          Unicode identifier; {@code false} otherwise.

  5987      * @see     Character#isIdentifierIgnorable(char)

  5988      * @see     Character#isJavaIdentifierPart(char)

  5989      * @see     Character#isLetterOrDigit(char)

  5990      * @see     Character#isUnicodeIdentifierStart(char)

  5991      * @since   1.1

  5992      */

  5993     public static boolean isUnicodeIdentifierPart(char ch) {

  5994         return isUnicodeIdentifierPart((int)ch);

  5995     }

  5997     /**

  5998      * Determines if the specified character (Unicode code point) may be part of a Unicode

  5999      * identifier as other than the first character.

  6000      * <p>

  6001      * A character may be part of a Unicode identifier if and only if

  6002      * one of the following statements is true:

  6003      * <ul>

  6004      * <li>  it is a letter

  6005      * <li>  it is a connecting punctuation character (such as {@code '_'})

  6006      * <li>  it is a digit

  6007      * <li>  it is a numeric letter (such as a Roman numeral character)

  6008      * <li>  it is a combining mark

  6009      * <li>  it is a non-spacing mark

  6010      * <li> {@code isIdentifierIgnorable} returns

  6011      * {@code true} for this character.

  6012      * </ul>

  6013      * @param   codePoint the character (Unicode code point) to be tested.

  6014      * @return  {@code true} if the character may be part of a

  6015      *          Unicode identifier; {@code false} otherwise.

  6016      * @see     Character#isIdentifierIgnorable(int)

  6017      * @see     Character#isJavaIdentifierPart(int)

  6018      * @see     Character#isLetterOrDigit(int)

  6019      * @see     Character#isUnicodeIdentifierStart(int)

  6020      * @since   1.5

  6021      */

  6022     public static boolean isUnicodeIdentifierPart(int codePoint) {

  6023         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);

  6024     }

  6026     /**

  6027      * Determines if the specified character should be regarded as

  6028      * an ignorable character in a Java identifier or a Unicode identifier.

  6029      * <p>

  6030      * The following Unicode characters are ignorable in a Java identifier

  6031      * or a Unicode identifier:

  6032      * <ul>

  6033      * <li>ISO control characters that are not whitespace

  6034      * <ul>

  6035      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}

  6036      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}

  6037      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}

  6038      * </ul>

  6039      *

  6040      * <li>all characters that have the {@code FORMAT} general

  6041      * category value

  6042      * </ul>

  6043      *

  6044      * <p><b>Note:</b> This method cannot handle <a

  6045      * href="#supplementary"> supplementary characters</a>. To support

  6046      * all Unicode characters, including supplementary characters, use

  6047      * the {@link #isIdentifierIgnorable(int)} method.

  6048      *

  6049      * @param   ch      the character to be tested.

  6050      * @return  {@code true} if the character is an ignorable control

  6051      *          character that may be part of a Java or Unicode identifier;

  6052      *           {@code false} otherwise.

  6053      * @see     Character#isJavaIdentifierPart(char)

  6054      * @see     Character#isUnicodeIdentifierPart(char)

  6055      * @since   1.1

  6056      */

  6057     public static boolean isIdentifierIgnorable(char ch) {

  6058         return isIdentifierIgnorable((int)ch);

  6059     }

  6061     /**

  6062      * Determines if the specified character (Unicode code point) should be regarded as

  6063      * an ignorable character in a Java identifier or a Unicode identifier.

  6064      * <p>

  6065      * The following Unicode characters are ignorable in a Java identifier

  6066      * or a Unicode identifier:

  6067      * <ul>

  6068      * <li>ISO control characters that are not whitespace

  6069      * <ul>

  6070      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}

  6071      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}

  6072      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}

  6073      * </ul>

  6074      *

  6075      * <li>all characters that have the {@code FORMAT} general

  6076      * category value

  6077      * </ul>

  6078      *

  6079      * @param   codePoint the character (Unicode code point) to be tested.

  6080      * @return  {@code true} if the character is an ignorable control

  6081      *          character that may be part of a Java or Unicode identifier;

  6082      *          {@code false} otherwise.

  6083      * @see     Character#isJavaIdentifierPart(int)

  6084      * @see     Character#isUnicodeIdentifierPart(int)

  6085      * @since   1.5

  6086      */

  6087     public static boolean isIdentifierIgnorable(int codePoint) {

  6088         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);

  6089     }

  6091     /**

  6092      * Converts the character argument to lowercase using case

  6093      * mapping information from the UnicodeData file.

  6094      * <p>

  6095      * Note that

  6096      * {@code Character.isLowerCase(Character.toLowerCase(ch))}

  6097      * does not always return {@code true} for some ranges of

  6098      * characters, particularly those that are symbols or ideographs.

  6099      *

  6100      * <p>In general, {@link String#toLowerCase()} should be used to map

  6101      * characters to lowercase. {@code String} case mapping methods

  6102      * have several benefits over {@code Character} case mapping methods.

  6103      * {@code String} case mapping methods can perform locale-sensitive

  6104      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  6105      * the {@code Character} case mapping methods cannot.

  6106      *

  6107      * <p><b>Note:</b> This method cannot handle <a

  6108      * href="#supplementary"> supplementary characters</a>. To support

  6109      * all Unicode characters, including supplementary characters, use

  6110      * the {@link #toLowerCase(int)} method.

  6111      *

  6112      * @param   ch   the character to be converted.

  6113      * @return  the lowercase equivalent of the character, if any;

  6114      *          otherwise, the character itself.

  6115      * @see     Character#isLowerCase(char)

  6116      * @see     String#toLowerCase()

  6117      */

  6118     public static char toLowerCase(char ch) {

  6119         return (char)toLowerCase((int)ch);

  6120     }

  6122     /**

  6123      * Converts the character (Unicode code point) argument to

  6124      * lowercase using case mapping information from the UnicodeData

  6125      * file.

  6126      *

  6127      * <p> Note that

  6128      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}

  6129      * does not always return {@code true} for some ranges of

  6130      * characters, particularly those that are symbols or ideographs.

  6131      *

  6132      * <p>In general, {@link String#toLowerCase()} should be used to map

  6133      * characters to lowercase. {@code String} case mapping methods

  6134      * have several benefits over {@code Character} case mapping methods.

  6135      * {@code String} case mapping methods can perform locale-sensitive

  6136      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  6137      * the {@code Character} case mapping methods cannot.

  6138      *

  6139      * @param   codePoint   the character (Unicode code point) to be converted.

  6140      * @return  the lowercase equivalent of the character (Unicode code

  6141      *          point), if any; otherwise, the character itself.

  6142      * @see     Character#isLowerCase(int)

  6143      * @see     String#toLowerCase()

  6144      *

  6145      * @since   1.5

  6146      */

  6147     public static int toLowerCase(int codePoint) {

  6148         return CharacterData.of(codePoint).toLowerCase(codePoint);

  6149     }

  6151     /**

  6152      * Converts the character argument to uppercase using case mapping

  6153      * information from the UnicodeData file.

  6154      * <p>

  6155      * Note that

  6156      * {@code Character.isUpperCase(Character.toUpperCase(ch))}

  6157      * does not always return {@code true} for some ranges of

  6158      * characters, particularly those that are symbols or ideographs.

  6159      *

  6160      * <p>In general, {@link String#toUpperCase()} should be used to map

  6161      * characters to uppercase. {@code String} case mapping methods

  6162      * have several benefits over {@code Character} case mapping methods.

  6163      * {@code String} case mapping methods can perform locale-sensitive

  6164      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  6165      * the {@code Character} case mapping methods cannot.

  6166      *

  6167      * <p><b>Note:</b> This method cannot handle <a

  6168      * href="#supplementary"> supplementary characters</a>. To support

  6169      * all Unicode characters, including supplementary characters, use

  6170      * the {@link #toUpperCase(int)} method.

  6171      *

  6172      * @param   ch   the character to be converted.

  6173      * @return  the uppercase equivalent of the character, if any;

  6174      *          otherwise, the character itself.

  6175      * @see     Character#isUpperCase(char)

  6176      * @see     String#toUpperCase()

  6177      */

  6178     public static char toUpperCase(char ch) {

  6179         return (char)toUpperCase((int)ch);

  6180     }

  6182     /**

  6183      * Converts the character (Unicode code point) argument to

  6184      * uppercase using case mapping information from the UnicodeData

  6185      * file.

  6186      *

  6187      * <p>Note that

  6188      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}

  6189      * does not always return {@code true} for some ranges of

  6190      * characters, particularly those that are symbols or ideographs.

  6191      *

  6192      * <p>In general, {@link String#toUpperCase()} should be used to map

  6193      * characters to uppercase. {@code String} case mapping methods

  6194      * have several benefits over {@code Character} case mapping methods.

  6195      * {@code String} case mapping methods can perform locale-sensitive

  6196      * mappings, context-sensitive mappings, and 1:M character mappings, whereas

  6197      * the {@code Character} case mapping methods cannot.

  6198      *

  6199      * @param   codePoint   the character (Unicode code point) to be converted.

  6200      * @return  the uppercase equivalent of the character, if any;

  6201      *          otherwise, the character itself.

  6202      * @see     Character#isUpperCase(int)

  6203      * @see     String#toUpperCase()

  6204      *

  6205      * @since   1.5

  6206      */

  6207     public static int toUpperCase(int codePoint) {

  6208         return CharacterData.of(codePoint).toUpperCase(codePoint);

  6209     }

  6211     /**

  6212      * Converts the character argument to titlecase using case mapping

  6213      * information from the UnicodeData file. If a character has no

  6214      * explicit titlecase mapping and is not itself a titlecase char

  6215      * according to UnicodeData, then the uppercase mapping is

  6216      * returned as an equivalent titlecase mapping. If the

  6217      * {@code char} argument is already a titlecase

  6218      * {@code char}, the same {@code char} value will be

  6219      * returned.

  6220      * <p>

  6221      * Note that

  6222      * {@code Character.isTitleCase(Character.toTitleCase(ch))}

  6223      * does not always return {@code true} for some ranges of

  6224      * characters.

  6225      *

  6226      * <p><b>Note:</b> This method cannot handle <a

  6227      * href="#supplementary"> supplementary characters</a>. To support

  6228      * all Unicode characters, including supplementary characters, use

  6229      * the {@link #toTitleCase(int)} method.

  6230      *

  6231      * @param   ch   the character to be converted.

  6232      * @return  the titlecase equivalent of the character, if any;

  6233      *          otherwise, the character itself.

  6234      * @see     Character#isTitleCase(char)

  6235      * @see     Character#toLowerCase(char)

  6236      * @see     Character#toUpperCase(char)

  6237      * @since   1.0.2

  6238      */

  6239     public static char toTitleCase(char ch) {

  6240         return (char)toTitleCase((int)ch);

  6241     }

  6243     /**

  6244      * Converts the character (Unicode code point) argument to titlecase using case mapping

  6245      * information from the UnicodeData file. If a character has no

  6246      * explicit titlecase mapping and is not itself a titlecase char

  6247      * according to UnicodeData, then the uppercase mapping is

  6248      * returned as an equivalent titlecase mapping. If the

  6249      * character argument is already a titlecase

  6250      * character, the same character value will be

  6251      * returned.

  6252      *

  6253      * <p>Note that

  6254      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}

  6255      * does not always return {@code true} for some ranges of

  6256      * characters.

  6257      *

  6258      * @param   codePoint   the character (Unicode code point) to be converted.

  6259      * @return  the titlecase equivalent of the character, if any;

  6260      *          otherwise, the character itself.

  6261      * @see     Character#isTitleCase(int)

  6262      * @see     Character#toLowerCase(int)

  6263      * @see     Character#toUpperCase(int)

  6264      * @since   1.5

  6265      */

  6266     public static int toTitleCase(int codePoint) {

  6267         return CharacterData.of(codePoint).toTitleCase(codePoint);

  6268     }

  6270     /**

  6271      * Returns the numeric value of the character {@code ch} in the

  6272      * specified radix.

  6273      * <p>

  6274      * If the radix is not in the range {@code MIN_RADIX} &le;

  6275      * {@code radix} &le; {@code MAX_RADIX} or if the

  6276      * value of {@code ch} is not a valid digit in the specified

  6277      * radix, {@code -1} is returned. A character is a valid digit

  6278      * if at least one of the following is true:

  6279      * <ul>

  6280      * <li>The method {@code isDigit} is {@code true} of the character

  6281      *     and the Unicode decimal digit value of the character (or its

  6282      *     single-character decomposition) is less than the specified radix.

  6283      *     In this case the decimal digit value is returned.

  6284      * <li>The character is one of the uppercase Latin letters

  6285      *     {@code 'A'} through {@code 'Z'} and its code is less than

  6286      *     {@code radix + 'A' - 10}.

  6287      *     In this case, {@code ch - 'A' + 10}

  6288      *     is returned.

  6289      * <li>The character is one of the lowercase Latin letters

  6290      *     {@code 'a'} through {@code 'z'} and its code is less than

  6291      *     {@code radix + 'a' - 10}.

  6292      *     In this case, {@code ch - 'a' + 10}

  6293      *     is returned.

  6294      * <li>The character is one of the fullwidth uppercase Latin letters A

  6295      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})

  6296      *     and its code is less than

  6297      *     {@code radix + '\u005CuFF21' - 10}.

  6298      *     In this case, {@code ch - '\u005CuFF21' + 10}

  6299      *     is returned.

  6300      * <li>The character is one of the fullwidth lowercase Latin letters a

  6301      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})

  6302      *     and its code is less than

  6303      *     {@code radix + '\u005CuFF41' - 10}.

  6304      *     In this case, {@code ch - '\u005CuFF41' + 10}

  6305      *     is returned.

  6306      * </ul>

  6307      *

  6308      * <p><b>Note:</b> This method cannot handle <a

  6309      * href="#supplementary"> supplementary characters</a>. To support

  6310      * all Unicode characters, including supplementary characters, use

  6311      * the {@link #digit(int, int)} method.

  6312      *

  6313      * @param   ch      the character to be converted.

  6314      * @param   radix   the radix.

  6315      * @return  the numeric value represented by the character in the

  6316      *          specified radix.

  6317      * @see     Character#forDigit(int, int)

  6318      * @see     Character#isDigit(char)

  6319      */

  6320     public static int digit(char ch, int radix) {

  6321         return digit((int)ch, radix);

  6322     }

  6324     /**

  6325      * Returns the numeric value of the specified character (Unicode

  6326      * code point) in the specified radix.

  6327      *

  6328      * <p>If the radix is not in the range {@code MIN_RADIX} &le;

  6329      * {@code radix} &le; {@code MAX_RADIX} or if the

  6330      * character is not a valid digit in the specified

  6331      * radix, {@code -1} is returned. A character is a valid digit

  6332      * if at least one of the following is true:

  6333      * <ul>

  6334      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character

  6335      *     and the Unicode decimal digit value of the character (or its

  6336      *     single-character decomposition) is less than the specified radix.

  6337      *     In this case the decimal digit value is returned.

  6338      * <li>The character is one of the uppercase Latin letters

  6339      *     {@code 'A'} through {@code 'Z'} and its code is less than

  6340      *     {@code radix + 'A' - 10}.

  6341      *     In this case, {@code codePoint - 'A' + 10}

  6342      *     is returned.

  6343      * <li>The character is one of the lowercase Latin letters

  6344      *     {@code 'a'} through {@code 'z'} and its code is less than

  6345      *     {@code radix + 'a' - 10}.

  6346      *     In this case, {@code codePoint - 'a' + 10}

  6347      *     is returned.

  6348      * <li>The character is one of the fullwidth uppercase Latin letters A

  6349      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})

  6350      *     and its code is less than

  6351      *     {@code radix + '\u005CuFF21' - 10}.

  6352      *     In this case,

  6353      *     {@code codePoint - '\u005CuFF21' + 10}

  6354      *     is returned.

  6355      * <li>The character is one of the fullwidth lowercase Latin letters a

  6356      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})

  6357      *     and its code is less than

  6358      *     {@code radix + '\u005CuFF41'- 10}.

  6359      *     In this case,

  6360      *     {@code codePoint - '\u005CuFF41' + 10}

  6361      *     is returned.

  6362      * </ul>

  6363      *

  6364      * @param   codePoint the character (Unicode code point) to be converted.

  6365      * @param   radix   the radix.

  6366      * @return  the numeric value represented by the character in the

  6367      *          specified radix.

  6368      * @see     Character#forDigit(int, int)

  6369      * @see     Character#isDigit(int)

  6370      * @since   1.5

  6371      */

  6372     public static int digit(int codePoint, int radix) {

  6373         return CharacterData.of(codePoint).digit(codePoint, radix);

  6374     }

  6376     /**

  6377      * Returns the {@code int} value that the specified Unicode

  6378      * character represents. For example, the character

  6379      * {@code '\u005Cu216C'} (the roman numeral fifty) will return

  6380      * an int with a value of 50.

  6381      * <p>

  6382      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through

  6383      * {@code '\u005Cu005A'}), lowercase

  6384      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and

  6385      * full width variant ({@code '\u005CuFF21'} through

  6386      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through

  6387      * {@code '\u005CuFF5A'}) forms have numeric values from 10

  6388      * through 35. This is independent of the Unicode specification,

  6389      * which does not assign numeric values to these {@code char}

  6390      * values.

  6391      * <p>

  6392      * If the character does not have a numeric value, then -1 is returned.

  6393      * If the character has a numeric value that cannot be represented as a

  6394      * nonnegative integer (for example, a fractional value), then -2

  6395      * is returned.

  6396      *

  6397      * <p><b>Note:</b> This method cannot handle <a

  6398      * href="#supplementary"> supplementary characters</a>. To support

  6399      * all Unicode characters, including supplementary characters, use

  6400      * the {@link #getNumericValue(int)} method.

  6401      *

  6402      * @param   ch      the character to be converted.

  6403      * @return  the numeric value of the character, as a nonnegative {@code int}

  6404      *           value; -2 if the character has a numeric value that is not a

  6405      *          nonnegative integer; -1 if the character has no numeric value.

  6406      * @see     Character#forDigit(int, int)

  6407      * @see     Character#isDigit(char)

  6408      * @since   1.1

  6409      */

  6410     public static int getNumericValue(char ch) {

  6411         return getNumericValue((int)ch);

  6412     }

  6414     /**

  6415      * Returns the {@code int} value that the specified

  6416      * character (Unicode code point) represents. For example, the character

  6417      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return

  6418      * an {@code int} with a value of 50.

  6419      * <p>

  6420      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through

  6421      * {@code '\u005Cu005A'}), lowercase

  6422      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and

  6423      * full width variant ({@code '\u005CuFF21'} through

  6424      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through

  6425      * {@code '\u005CuFF5A'}) forms have numeric values from 10

  6426      * through 35. This is independent of the Unicode specification,

  6427      * which does not assign numeric values to these {@code char}

  6428      * values.

  6429      * <p>

  6430      * If the character does not have a numeric value, then -1 is returned.

  6431      * If the character has a numeric value that cannot be represented as a

  6432      * nonnegative integer (for example, a fractional value), then -2

  6433      * is returned.

  6434      *

  6435      * @param   codePoint the character (Unicode code point) to be converted.

  6436      * @return  the numeric value of the character, as a nonnegative {@code int}

  6437      *          value; -2 if the character has a numeric value that is not a

  6438      *          nonnegative integer; -1 if the character has no numeric value.

  6439      * @see     Character#forDigit(int, int)

  6440      * @see     Character#isDigit(int)

  6441      * @since   1.5

  6442      */

  6443     public static int getNumericValue(int codePoint) {

  6444         return CharacterData.of(codePoint).getNumericValue(codePoint);

  6445     }

  6447     /**

  6448      * Determines if the specified character is ISO-LATIN-1 white space.

  6449      * This method returns {@code true} for the following five

  6450      * characters only:

  6451      * <table>

  6452      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>

  6453      *     <td>{@code HORIZONTAL TABULATION}</td></tr>

  6454      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>

  6455      *     <td>{@code NEW LINE}</td></tr>

  6456      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>

  6457      *     <td>{@code FORM FEED}</td></tr>

  6458      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>

  6459      *     <td>{@code CARRIAGE RETURN}</td></tr>

  6460      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>

  6461      *     <td>{@code SPACE}</td></tr>

  6462      * </table>

  6463      *

  6464      * @param      ch   the character to be tested.

  6465      * @return     {@code true} if the character is ISO-LATIN-1 white

  6466      *             space; {@code false} otherwise.

  6467      * @see        Character#isSpaceChar(char)

  6468      * @see        Character#isWhitespace(char)

  6469      * @deprecated Replaced by isWhitespace(char).

  6470      */

  6471     @Deprecated

  6472     public static boolean isSpace(char ch) {

  6473         return (ch <= 0x0020) &&

  6474             (((((1L << 0x0009) |

  6475             (1L << 0x000A) |

  6476             (1L << 0x000C) |

  6477             (1L << 0x000D) |

  6478             (1L << 0x0020)) >> ch) & 1L) != 0);

  6479     }

  6482     /**

  6483      * Determines if the specified character is a Unicode space character.

  6484      * A character is considered to be a space character if and only if

  6485      * it is specified to be a space character by the Unicode Standard. This

  6486      * method returns true if the character's general category type is any of

  6487      * the following:

  6488      * <ul>

  6489      * <li> {@code SPACE_SEPARATOR}

  6490      * <li> {@code LINE_SEPARATOR}

  6491      * <li> {@code PARAGRAPH_SEPARATOR}

  6492      * </ul>

  6493      *

  6494      * <p><b>Note:</b> This method cannot handle <a

  6495      * href="#supplementary"> supplementary characters</a>. To support

  6496      * all Unicode characters, including supplementary characters, use

  6497      * the {@link #isSpaceChar(int)} method.

  6498      *

  6499      * @param   ch      the character to be tested.

  6500      * @return  {@code true} if the character is a space character;

  6501      *          {@code false} otherwise.

  6502      * @see     Character#isWhitespace(char)

  6503      * @since   1.1

  6504      */

  6505     public static boolean isSpaceChar(char ch) {

  6506         return isSpaceChar((int)ch);

  6507     }

  6509     /**

  6510      * Determines if the specified character (Unicode code point) is a

  6511      * Unicode space character.  A character is considered to be a

  6512      * space character if and only if it is specified to be a space

  6513      * character by the Unicode Standard. This method returns true if

  6514      * the character's general category type is any of the following:

  6515      *

  6516      * <ul>

  6517      * <li> {@link #SPACE_SEPARATOR}

  6518      * <li> {@link #LINE_SEPARATOR}

  6519      * <li> {@link #PARAGRAPH_SEPARATOR}

  6520      * </ul>

  6521      *

  6522      * @param   codePoint the character (Unicode code point) to be tested.

  6523      * @return  {@code true} if the character is a space character;

  6524      *          {@code false} otherwise.

  6525      * @see     Character#isWhitespace(int)

  6526      * @since   1.5

  6527      */

  6528     public static boolean isSpaceChar(int codePoint) {

  6529         return ((((1 << Character.SPACE_SEPARATOR) |

  6530                   (1 << Character.LINE_SEPARATOR) |

  6531                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)

  6532             != 0;

  6533     }

  6535     /**

  6536      * Determines if the specified character is white space according to Java.

  6537      * A character is a Java whitespace character if and only if it satisfies

  6538      * one of the following criteria:

  6539      * <ul>

  6540      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},

  6541      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})

  6542      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},

  6543      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).

  6544      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.

  6545      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.

  6546      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.

  6547      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.

  6548      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.

  6549      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.

  6550      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.

  6551      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.

  6552      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.

  6553      * </ul>

  6554      *

  6555      * <p><b>Note:</b> This method cannot handle <a

  6556      * href="#supplementary"> supplementary characters</a>. To support

  6557      * all Unicode characters, including supplementary characters, use

  6558      * the {@link #isWhitespace(int)} method.

  6559      *

  6560      * @param   ch the character to be tested.

  6561      * @return  {@code true} if the character is a Java whitespace

  6562      *          character; {@code false} otherwise.

  6563      * @see     Character#isSpaceChar(char)

  6564      * @since   1.1

  6565      */

  6566     public static boolean isWhitespace(char ch) {

  6567         return isWhitespace((int)ch);

  6568     }

  6570     /**

  6571      * Determines if the specified character (Unicode code point) is

  6572      * white space according to Java.  A character is a Java

  6573      * whitespace character if and only if it satisfies one of the

  6574      * following criteria:

  6575      * <ul>

  6576      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},

  6577      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})

  6578      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},

  6579      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).

  6580      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.

  6581      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.

  6582      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.

  6583      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.

  6584      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.

  6585      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.

  6586      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.

  6587      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.

  6588      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.

  6589      * </ul>

  6590      * <p>

  6591      *

  6592      * @param   codePoint the character (Unicode code point) to be tested.

  6593      * @return  {@code true} if the character is a Java whitespace

  6594      *          character; {@code false} otherwise.

  6595      * @see     Character#isSpaceChar(int)

  6596      * @since   1.5

  6597      */

  6598     public static boolean isWhitespace(int codePoint) {

  6599         return CharacterData.of(codePoint).isWhitespace(codePoint);

  6600     }

  6602     /**

  6603      * Determines if the specified character is an ISO control

  6604      * character.  A character is considered to be an ISO control

  6605      * character if its code is in the range {@code '\u005Cu0000'}

  6606      * through {@code '\u005Cu001F'} or in the range

  6607      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.

  6608      *

  6609      * <p><b>Note:</b> This method cannot handle <a

  6610      * href="#supplementary"> supplementary characters</a>. To support

  6611      * all Unicode characters, including supplementary characters, use

  6612      * the {@link #isISOControl(int)} method.

  6613      *

  6614      * @param   ch      the character to be tested.

  6615      * @return  {@code true} if the character is an ISO control character;

  6616      *          {@code false} otherwise.

  6617      *

  6618      * @see     Character#isSpaceChar(char)

  6619      * @see     Character#isWhitespace(char)

  6620      * @since   1.1

  6621      */

  6622     public static boolean isISOControl(char ch) {

  6623         return isISOControl((int)ch);

  6624     }

  6626     /**

  6627      * Determines if the referenced character (Unicode code point) is an ISO control

  6628      * character.  A character is considered to be an ISO control

  6629      * character if its code is in the range {@code '\u005Cu0000'}

  6630      * through {@code '\u005Cu001F'} or in the range

  6631      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.

  6632      *

  6633      * @param   codePoint the character (Unicode code point) to be tested.

  6634      * @return  {@code true} if the character is an ISO control character;

  6635      *          {@code false} otherwise.

  6636      * @see     Character#isSpaceChar(int)

  6637      * @see     Character#isWhitespace(int)

  6638      * @since   1.5

  6639      */

  6640     public static boolean isISOControl(int codePoint) {

  6641         // Optimized form of:

  6642         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||

  6643         //     (codePoint >= 0x7F && codePoint <= 0x9F);

  6644         return codePoint <= 0x9F &&

  6645             (codePoint >= 0x7F || (codePoint >>> 5 == 0));

  6646     }

  6648     /**

  6649      * Returns a value indicating a character's general category.

  6650      *

  6651      * <p><b>Note:</b> This method cannot handle <a

  6652      * href="#supplementary"> supplementary characters</a>. To support

  6653      * all Unicode characters, including supplementary characters, use

  6654      * the {@link #getType(int)} method.

  6655      *

  6656      * @param   ch      the character to be tested.

  6657      * @return  a value of type {@code int} representing the

  6658      *          character's general category.

  6659      * @see     Character#COMBINING_SPACING_MARK

  6660      * @see     Character#CONNECTOR_PUNCTUATION

  6661      * @see     Character#CONTROL

  6662      * @see     Character#CURRENCY_SYMBOL

  6663      * @see     Character#DASH_PUNCTUATION

  6664      * @see     Character#DECIMAL_DIGIT_NUMBER

  6665      * @see     Character#ENCLOSING_MARK

  6666      * @see     Character#END_PUNCTUATION

  6667      * @see     Character#FINAL_QUOTE_PUNCTUATION

  6668      * @see     Character#FORMAT

  6669      * @see     Character#INITIAL_QUOTE_PUNCTUATION

  6670      * @see     Character#LETTER_NUMBER

  6671      * @see     Character#LINE_SEPARATOR

  6672      * @see     Character#LOWERCASE_LETTER

  6673      * @see     Character#MATH_SYMBOL

  6674      * @see     Character#MODIFIER_LETTER

  6675      * @see     Character#MODIFIER_SYMBOL

  6676      * @see     Character#NON_SPACING_MARK

  6677      * @see     Character#OTHER_LETTER

  6678      * @see     Character#OTHER_NUMBER

  6679      * @see     Character#OTHER_PUNCTUATION

  6680      * @see     Character#OTHER_SYMBOL

  6681      * @see     Character#PARAGRAPH_SEPARATOR

  6682      * @see     Character#PRIVATE_USE

  6683      * @see     Character#SPACE_SEPARATOR

  6684      * @see     Character#START_PUNCTUATION

  6685      * @see     Character#SURROGATE

  6686      * @see     Character#TITLECASE_LETTER

  6687      * @see     Character#UNASSIGNED

  6688      * @see     Character#UPPERCASE_LETTER

  6689      * @since   1.1

  6690      */

  6691     public static int getType(char ch) {

  6692         return getType((int)ch);

  6693     }

  6695     /**

  6696      * Returns a value indicating a character's general category.

  6697      *

  6698      * @param   codePoint the character (Unicode code point) to be tested.

  6699      * @return  a value of type {@code int} representing the

  6700      *          character's general category.

  6701      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK

  6702      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION

  6703      * @see     Character#CONTROL CONTROL

  6704      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL

  6705      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION

  6706      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER

  6707      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK

  6708      * @see     Character#END_PUNCTUATION END_PUNCTUATION

  6709      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION

  6710      * @see     Character#FORMAT FORMAT

  6711      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION

  6712      * @see     Character#LETTER_NUMBER LETTER_NUMBER

  6713      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR

  6714      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER

  6715      * @see     Character#MATH_SYMBOL MATH_SYMBOL

  6716      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER

  6717      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL

  6718      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK

  6719      * @see     Character#OTHER_LETTER OTHER_LETTER

  6720      * @see     Character#OTHER_NUMBER OTHER_NUMBER

  6721      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION

  6722      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL

  6723      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR

  6724      * @see     Character#PRIVATE_USE PRIVATE_USE

  6725      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR

  6726      * @see     Character#START_PUNCTUATION START_PUNCTUATION

  6727      * @see     Character#SURROGATE SURROGATE

  6728      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER

  6729      * @see     Character#UNASSIGNED UNASSIGNED

  6730      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER

  6731      * @since   1.5

  6732      */

  6733     public static int getType(int codePoint) {

  6734         return CharacterData.of(codePoint).getType(codePoint);

  6735     }

  6737     /**

  6738      * Determines the character representation for a specific digit in

  6739      * the specified radix. If the value of {@code radix} is not a

  6740      * valid radix, or the value of {@code digit} is not a valid

  6741      * digit in the specified radix, the null character

  6742      * ({@code '\u005Cu0000'}) is returned.

  6743      * <p>

  6744      * The {@code radix} argument is valid if it is greater than or

  6745      * equal to {@code MIN_RADIX} and less than or equal to

  6746      * {@code MAX_RADIX}. The {@code digit} argument is valid if

  6747      * {@code 0 <= digit < radix}.

  6748      * <p>

  6749      * If the digit is less than 10, then

  6750      * {@code '0' + digit} is returned. Otherwise, the value

  6751      * {@code 'a' + digit - 10} is returned.

  6752      *

  6753      * @param   digit   the number to convert to a character.

  6754      * @param   radix   the radix.

  6755      * @return  the {@code char} representation of the specified digit

  6756      *          in the specified radix.

  6757      * @see     Character#MIN_RADIX

  6758      * @see     Character#MAX_RADIX

  6759      * @see     Character#digit(char, int)

  6760      */

  6761     public static char forDigit(int digit, int radix) {

  6762         if ((digit >= radix) || (digit < 0)) {

  6763             return '\0';

  6764         }

  6765         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {

  6766             return '\0';

  6767         }

  6768         if (digit < 10) {

  6769             return (char)('0' + digit);

  6770         }

  6771         return (char)('a' - 10 + digit);

  6772     }

  6774     /**

  6775      * Returns the Unicode directionality property for the given

  6776      * character.  Character directionality is used to calculate the

  6777      * visual ordering of text. The directionality value of undefined

  6778      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.

  6779      *

  6780      * <p><b>Note:</b> This method cannot handle <a

  6781      * href="#supplementary"> supplementary characters</a>. To support

  6782      * all Unicode characters, including supplementary characters, use

  6783      * the {@link #getDirectionality(int)} method.

  6784      *

  6785      * @param  ch {@code char} for which the directionality property

  6786      *            is requested.

  6787      * @return the directionality property of the {@code char} value.

  6788      *

  6789      * @see Character#DIRECTIONALITY_UNDEFINED

  6790      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT

  6791      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT

  6792      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC

  6793      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER

  6794      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR

  6795      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR

  6796      * @see Character#DIRECTIONALITY_ARABIC_NUMBER

  6797      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR

  6798      * @see Character#DIRECTIONALITY_NONSPACING_MARK

  6799      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL

  6800      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR

  6801      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR

  6802      * @see Character#DIRECTIONALITY_WHITESPACE

  6803      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS

  6804      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING

  6805      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE

  6806      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING

  6807      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE

  6808      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT

  6809      * @since 1.4

  6810      */

  6811     public static byte getDirectionality(char ch) {

  6812         return getDirectionality((int)ch);

  6813     }

  6815     /**

  6816      * Returns the Unicode directionality property for the given

  6817      * character (Unicode code point).  Character directionality is

  6818      * used to calculate the visual ordering of text. The

  6819      * directionality value of undefined character is {@link

  6820      * #DIRECTIONALITY_UNDEFINED}.

  6821      *

  6822      * @param   codePoint the character (Unicode code point) for which

  6823      *          the directionality property is requested.

  6824      * @return the directionality property of the character.

  6825      *

  6826      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED

  6827      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT

  6828      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT

  6829      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC

  6830      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER

  6831      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR

  6832      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR

  6833      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER

  6834      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR

  6835      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK

  6836      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL

  6837      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR

  6838      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR

  6839      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE

  6840      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS

  6841      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING

  6842      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE

  6843      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING

  6844      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE

  6845      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT

  6846      * @since    1.5

  6847      */

  6848     public static byte getDirectionality(int codePoint) {

  6849         return CharacterData.of(codePoint).getDirectionality(codePoint);

  6850     }

  6852     /**

  6853      * Determines whether the character is mirrored according to the

  6854      * Unicode specification.  Mirrored characters should have their

  6855      * glyphs horizontally mirrored when displayed in text that is

  6856      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT

  6857      * PARENTHESIS is semantically defined to be an <i>opening

  6858      * parenthesis</i>.  This will appear as a "(" in text that is

  6859      * left-to-right but as a ")" in text that is right-to-left.

  6860      *

  6861      * <p><b>Note:</b> This method cannot handle <a

  6862      * href="#supplementary"> supplementary characters</a>. To support

  6863      * all Unicode characters, including supplementary characters, use

  6864      * the {@link #isMirrored(int)} method.

  6865      *

  6866      * @param  ch {@code char} for which the mirrored property is requested

  6867      * @return {@code true} if the char is mirrored, {@code false}

  6868      *         if the {@code char} is not mirrored or is not defined.

  6869      * @since 1.4

  6870      */

  6871     public static boolean isMirrored(char ch) {

  6872         return isMirrored((int)ch);

  6873     }

  6875     /**

  6876      * Determines whether the specified character (Unicode code point)

  6877      * is mirrored according to the Unicode specification.  Mirrored

  6878      * characters should have their glyphs horizontally mirrored when

  6879      * displayed in text that is right-to-left.  For example,

  6880      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically

  6881      * defined to be an <i>opening parenthesis</i>.  This will appear

  6882      * as a "(" in text that is left-to-right but as a ")" in text

  6883      * that is right-to-left.

  6884      *

  6885      * @param   codePoint the character (Unicode code point) to be tested.

  6886      * @return  {@code true} if the character is mirrored, {@code false}

  6887      *          if the character is not mirrored or is not defined.

  6888      * @since   1.5

  6889      */

  6890     public static boolean isMirrored(int codePoint) {

  6891         return CharacterData.of(codePoint).isMirrored(codePoint);

  6892     }

  6894     /**

  6895      * Compares two {@code Character} objects numerically.

  6896      *

  6897      * @param   anotherCharacter   the {@code Character} to be compared.

  6899      * @return  the value {@code 0} if the argument {@code Character}

  6900      *          is equal to this {@code Character}; a value less than

  6901      *          {@code 0} if this {@code Character} is numerically less

  6902      *          than the {@code Character} argument; and a value greater than

  6903      *          {@code 0} if this {@code Character} is numerically greater

  6904      *          than the {@code Character} argument (unsigned comparison).

  6905      *          Note that this is strictly a numerical comparison; it is not

  6906      *          locale-dependent.

  6907      * @since   1.2

  6908      */

  6909     public int compareTo(Character anotherCharacter) {

  6910         return compare(this.value, anotherCharacter.value);

  6911     }

  6913     /**

  6914      * Compares two {@code char} values numerically.

  6915      * The value returned is identical to what would be returned by:

  6916      * <pre>

  6917      *    Character.valueOf(x).compareTo(Character.valueOf(y))

  6918      * </pre>

  6919      *

  6920      * @param  x the first {@code char} to compare

  6921      * @param  y the second {@code char} to compare

  6922      * @return the value {@code 0} if {@code x == y};

  6923      *         a value less than {@code 0} if {@code x < y}; and

  6924      *         a value greater than {@code 0} if {@code x > y}

  6925      * @since 1.7

  6926      */

  6927     public static int compare(char x, char y) {

  6928         return x - y;

  6929     }

  6931     /**

  6932      * Converts the character (Unicode code point) argument to uppercase using

  6933      * information from the UnicodeData file.

  6934      * <p>

  6935      *

  6936      * @param   codePoint   the character (Unicode code point) to be converted.

  6937      * @return  either the uppercase equivalent of the character, if

  6938      *          any, or an error flag ({@code Character.ERROR})

  6939      *          that indicates that a 1:M {@code char} mapping exists.

  6940      * @see     Character#isLowerCase(char)

  6941      * @see     Character#isUpperCase(char)

  6942      * @see     Character#toLowerCase(char)

  6943      * @see     Character#toTitleCase(char)

  6944      * @since 1.4

  6945      */

  6946     static int toUpperCaseEx(int codePoint) {

  6947         assert isValidCodePoint(codePoint);

  6948         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);

  6949     }

  6951     /**

  6952      * Converts the character (Unicode code point) argument to uppercase using case

  6953      * mapping information from the SpecialCasing file in the Unicode

  6954      * specification. If a character has no explicit uppercase

  6955      * mapping, then the {@code char} itself is returned in the

  6956      * {@code char[]}.

  6957      *

  6958      * @param   codePoint   the character (Unicode code point) to be converted.

  6959      * @return a {@code char[]} with the uppercased character.

  6960      * @since 1.4

  6961      */

  6962     static char[] toUpperCaseCharArray(int codePoint) {

  6963         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.

  6964         assert isBmpCodePoint(codePoint);

  6965         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);

  6966     }

  6968     /**

  6969      * The number of bits used to represent a <tt>char</tt> value in unsigned

  6970      * binary form, constant {@code 16}.

  6971      *

  6972      * @since 1.5

  6973      */

  6974     public static final int SIZE = 16;

  6976     /**

  6977      * Returns the value obtained by reversing the order of the bytes in the

  6978      * specified <tt>char</tt> value.

  6979      *

  6980      * @return the value obtained by reversing (or, equivalently, swapping)

  6981      *     the bytes in the specified <tt>char</tt> value.

  6982      * @since 1.5

  6983      */

  6984     public static char reverseBytes(char ch) {

  6985         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));

  6986     }

  6988     /**

  6989      * Returns the Unicode name of the specified character

  6990      * {@code codePoint}, or null if the code point is

  6991      * {@link #UNASSIGNED unassigned}.

  6992      * <p>

  6993      * Note: if the specified character is not assigned a name by

  6994      * the <i>UnicodeData</i> file (part of the Unicode Character

  6995      * Database maintained by the Unicode Consortium), the returned

  6996      * name is the same as the result of expression.

  6997      *

  6998      * <blockquote>{@code

  6999      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')

  7000      *     + " "

  7001      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);

  7002      *

  7003      * }</blockquote>

  7004      *

  7005      * @param  codePoint the character (Unicode code point)

  7006      *

  7007      * @return the Unicode name of the specified character, or null if

  7008      *         the code point is unassigned.

  7009      *

  7010      * @exception IllegalArgumentException if the specified

  7011      *            {@code codePoint} is not a valid Unicode

  7012      *            code point.

  7013      *

  7014      * @since 1.7

  7015      */

  7016     public static String getName(int codePoint) {

  7017         if (!isValidCodePoint(codePoint)) {

  7018             throw new IllegalArgumentException();

  7019         }

  7020         String name = CharacterName.get(codePoint);

  7021         if (name != null)

  7022             return name;

  7023         if (getType(codePoint) == UNASSIGNED)

  7024             return null;

  7025         UnicodeBlock block = UnicodeBlock.of(codePoint);

  7026         if (block != null)

  7027             return block.toString().replace('_', ' ') + " "

  7028                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);

  7029         // should never come here

  7030         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);

  7031     }

  7032 }

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Sat, 29 Sep 2012 11:01:22 +0200
branch	jdk7-b147
changeset 68	a2924470187b
child 85	9f3c454e74d4
permissions	-rw-r--r--