emul/src/main/java/java/lang/Character.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Sat, 29 Sep 2012 11:01:22 +0200
branchjdk7-b147
changeset 68 a2924470187b
child 85 9f3c454e74d4
permissions -rw-r--r--
More exceptions and finally bringing in Character
     1 /*
     2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.lang;
    27 
    28 import java.util.Arrays;
    29 import java.util.Map;
    30 import java.util.HashMap;
    31 import java.util.Locale;
    32 
    33 /**
    34  * The {@code Character} class wraps a value of the primitive
    35  * type {@code char} in an object. An object of type
    36  * {@code Character} contains a single field whose type is
    37  * {@code char}.
    38  * <p>
    39  * In addition, this class provides several methods for determining
    40  * a character's category (lowercase letter, digit, etc.) and for converting
    41  * characters from uppercase to lowercase and vice versa.
    42  * <p>
    43  * Character information is based on the Unicode Standard, version 6.0.0.
    44  * <p>
    45  * The methods and data of class {@code Character} are defined by
    46  * the information in the <i>UnicodeData</i> file that is part of the
    47  * Unicode Character Database maintained by the Unicode
    48  * Consortium. This file specifies various properties including name
    49  * and general category for every defined Unicode code point or
    50  * character range.
    51  * <p>
    52  * The file and its description are available from the Unicode Consortium at:
    53  * <ul>
    54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
    55  * </ul>
    56  *
    57  * <h4><a name="unicode">Unicode Character Representations</a></h4>
    58  *
    59  * <p>The {@code char} data type (and therefore the value that a
    60  * {@code Character} object encapsulates) are based on the
    61  * original Unicode specification, which defined characters as
    62  * fixed-width 16-bit entities. The Unicode Standard has since been
    63  * changed to allow for characters whose representation requires more
    64  * than 16 bits.  The range of legal <em>code point</em>s is now
    65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
    66  * (Refer to the <a
    67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
    68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
    69  * Standard.)
    70  *
    71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
    72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
    73  * <a name="supplementary">Characters</a> whose code points are greater
    74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
    75  * platform uses the UTF-16 representation in {@code char} arrays and
    76  * in the {@code String} and {@code StringBuffer} classes. In
    77  * this representation, supplementary characters are represented as a pair
    78  * of {@code char} values, the first from the <em>high-surrogates</em>
    79  * range, (&#92;uD800-&#92;uDBFF), the second from the
    80  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
    81  *
    82  * <p>A {@code char} value, therefore, represents Basic
    83  * Multilingual Plane (BMP) code points, including the surrogate
    84  * code points, or code units of the UTF-16 encoding. An
    85  * {@code int} value represents all Unicode code points,
    86  * including supplementary code points. The lower (least significant)
    87  * 21 bits of {@code int} are used to represent Unicode code
    88  * points and the upper (most significant) 11 bits must be zero.
    89  * Unless otherwise specified, the behavior with respect to
    90  * supplementary characters and surrogate {@code char} values is
    91  * as follows:
    92  *
    93  * <ul>
    94  * <li>The methods that only accept a {@code char} value cannot support
    95  * supplementary characters. They treat {@code char} values from the
    96  * surrogate ranges as undefined characters. For example,
    97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
    98  * this specific value if followed by any low-surrogate value in a string
    99  * would represent a letter.
   100  *
   101  * <li>The methods that accept an {@code int} value support all
   102  * Unicode characters, including supplementary characters. For
   103  * example, {@code Character.isLetter(0x2F81A)} returns
   104  * {@code true} because the code point value represents a letter
   105  * (a CJK ideograph).
   106  * </ul>
   107  *
   108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
   109  * used for character values in the range between U+0000 and U+10FFFF,
   110  * and <em>Unicode code unit</em> is used for 16-bit
   111  * {@code char} values that are code units of the <em>UTF-16</em>
   112  * encoding. For more information on Unicode terminology, refer to the
   113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
   114  *
   115  * @author  Lee Boynton
   116  * @author  Guy Steele
   117  * @author  Akira Tanaka
   118  * @author  Martin Buchholz
   119  * @author  Ulf Zibis
   120  * @since   1.0
   121  */
   122 public final
   123 class Character implements java.io.Serializable, Comparable<Character> {
   124     /**
   125      * The minimum radix available for conversion to and from strings.
   126      * The constant value of this field is the smallest value permitted
   127      * for the radix argument in radix-conversion methods such as the
   128      * {@code digit} method, the {@code forDigit} method, and the
   129      * {@code toString} method of class {@code Integer}.
   130      *
   131      * @see     Character#digit(char, int)
   132      * @see     Character#forDigit(int, int)
   133      * @see     Integer#toString(int, int)
   134      * @see     Integer#valueOf(String)
   135      */
   136     public static final int MIN_RADIX = 2;
   137 
   138     /**
   139      * The maximum radix available for conversion to and from strings.
   140      * The constant value of this field is the largest value permitted
   141      * for the radix argument in radix-conversion methods such as the
   142      * {@code digit} method, the {@code forDigit} method, and the
   143      * {@code toString} method of class {@code Integer}.
   144      *
   145      * @see     Character#digit(char, int)
   146      * @see     Character#forDigit(int, int)
   147      * @see     Integer#toString(int, int)
   148      * @see     Integer#valueOf(String)
   149      */
   150     public static final int MAX_RADIX = 36;
   151 
   152     /**
   153      * The constant value of this field is the smallest value of type
   154      * {@code char}, {@code '\u005Cu0000'}.
   155      *
   156      * @since   1.0.2
   157      */
   158     public static final char MIN_VALUE = '\u0000';
   159 
   160     /**
   161      * The constant value of this field is the largest value of type
   162      * {@code char}, {@code '\u005CuFFFF'}.
   163      *
   164      * @since   1.0.2
   165      */
   166     public static final char MAX_VALUE = '\uFFFF';
   167 
   168     /**
   169      * The {@code Class} instance representing the primitive type
   170      * {@code char}.
   171      *
   172      * @since   1.1
   173      */
   174     @SuppressWarnings("unchecked")
   175     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
   176 
   177     /*
   178      * Normative general types
   179      */
   180 
   181     /*
   182      * General character types
   183      */
   184 
   185     /**
   186      * General category "Cn" in the Unicode specification.
   187      * @since   1.1
   188      */
   189     public static final byte UNASSIGNED = 0;
   190 
   191     /**
   192      * General category "Lu" in the Unicode specification.
   193      * @since   1.1
   194      */
   195     public static final byte UPPERCASE_LETTER = 1;
   196 
   197     /**
   198      * General category "Ll" in the Unicode specification.
   199      * @since   1.1
   200      */
   201     public static final byte LOWERCASE_LETTER = 2;
   202 
   203     /**
   204      * General category "Lt" in the Unicode specification.
   205      * @since   1.1
   206      */
   207     public static final byte TITLECASE_LETTER = 3;
   208 
   209     /**
   210      * General category "Lm" in the Unicode specification.
   211      * @since   1.1
   212      */
   213     public static final byte MODIFIER_LETTER = 4;
   214 
   215     /**
   216      * General category "Lo" in the Unicode specification.
   217      * @since   1.1
   218      */
   219     public static final byte OTHER_LETTER = 5;
   220 
   221     /**
   222      * General category "Mn" in the Unicode specification.
   223      * @since   1.1
   224      */
   225     public static final byte NON_SPACING_MARK = 6;
   226 
   227     /**
   228      * General category "Me" in the Unicode specification.
   229      * @since   1.1
   230      */
   231     public static final byte ENCLOSING_MARK = 7;
   232 
   233     /**
   234      * General category "Mc" in the Unicode specification.
   235      * @since   1.1
   236      */
   237     public static final byte COMBINING_SPACING_MARK = 8;
   238 
   239     /**
   240      * General category "Nd" in the Unicode specification.
   241      * @since   1.1
   242      */
   243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
   244 
   245     /**
   246      * General category "Nl" in the Unicode specification.
   247      * @since   1.1
   248      */
   249     public static final byte LETTER_NUMBER = 10;
   250 
   251     /**
   252      * General category "No" in the Unicode specification.
   253      * @since   1.1
   254      */
   255     public static final byte OTHER_NUMBER = 11;
   256 
   257     /**
   258      * General category "Zs" in the Unicode specification.
   259      * @since   1.1
   260      */
   261     public static final byte SPACE_SEPARATOR = 12;
   262 
   263     /**
   264      * General category "Zl" in the Unicode specification.
   265      * @since   1.1
   266      */
   267     public static final byte LINE_SEPARATOR = 13;
   268 
   269     /**
   270      * General category "Zp" in the Unicode specification.
   271      * @since   1.1
   272      */
   273     public static final byte PARAGRAPH_SEPARATOR = 14;
   274 
   275     /**
   276      * General category "Cc" in the Unicode specification.
   277      * @since   1.1
   278      */
   279     public static final byte CONTROL = 15;
   280 
   281     /**
   282      * General category "Cf" in the Unicode specification.
   283      * @since   1.1
   284      */
   285     public static final byte FORMAT = 16;
   286 
   287     /**
   288      * General category "Co" in the Unicode specification.
   289      * @since   1.1
   290      */
   291     public static final byte PRIVATE_USE = 18;
   292 
   293     /**
   294      * General category "Cs" in the Unicode specification.
   295      * @since   1.1
   296      */
   297     public static final byte SURROGATE = 19;
   298 
   299     /**
   300      * General category "Pd" in the Unicode specification.
   301      * @since   1.1
   302      */
   303     public static final byte DASH_PUNCTUATION = 20;
   304 
   305     /**
   306      * General category "Ps" in the Unicode specification.
   307      * @since   1.1
   308      */
   309     public static final byte START_PUNCTUATION = 21;
   310 
   311     /**
   312      * General category "Pe" in the Unicode specification.
   313      * @since   1.1
   314      */
   315     public static final byte END_PUNCTUATION = 22;
   316 
   317     /**
   318      * General category "Pc" in the Unicode specification.
   319      * @since   1.1
   320      */
   321     public static final byte CONNECTOR_PUNCTUATION = 23;
   322 
   323     /**
   324      * General category "Po" in the Unicode specification.
   325      * @since   1.1
   326      */
   327     public static final byte OTHER_PUNCTUATION = 24;
   328 
   329     /**
   330      * General category "Sm" in the Unicode specification.
   331      * @since   1.1
   332      */
   333     public static final byte MATH_SYMBOL = 25;
   334 
   335     /**
   336      * General category "Sc" in the Unicode specification.
   337      * @since   1.1
   338      */
   339     public static final byte CURRENCY_SYMBOL = 26;
   340 
   341     /**
   342      * General category "Sk" in the Unicode specification.
   343      * @since   1.1
   344      */
   345     public static final byte MODIFIER_SYMBOL = 27;
   346 
   347     /**
   348      * General category "So" in the Unicode specification.
   349      * @since   1.1
   350      */
   351     public static final byte OTHER_SYMBOL = 28;
   352 
   353     /**
   354      * General category "Pi" in the Unicode specification.
   355      * @since   1.4
   356      */
   357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
   358 
   359     /**
   360      * General category "Pf" in the Unicode specification.
   361      * @since   1.4
   362      */
   363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
   364 
   365     /**
   366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
   367      */
   368     static final int ERROR = 0xFFFFFFFF;
   369 
   370 
   371     /**
   372      * Undefined bidirectional character type. Undefined {@code char}
   373      * values have undefined directionality in the Unicode specification.
   374      * @since 1.4
   375      */
   376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
   377 
   378     /**
   379      * Strong bidirectional character type "L" in the Unicode specification.
   380      * @since 1.4
   381      */
   382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
   383 
   384     /**
   385      * Strong bidirectional character type "R" in the Unicode specification.
   386      * @since 1.4
   387      */
   388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
   389 
   390     /**
   391     * Strong bidirectional character type "AL" in the Unicode specification.
   392      * @since 1.4
   393      */
   394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
   395 
   396     /**
   397      * Weak bidirectional character type "EN" in the Unicode specification.
   398      * @since 1.4
   399      */
   400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
   401 
   402     /**
   403      * Weak bidirectional character type "ES" in the Unicode specification.
   404      * @since 1.4
   405      */
   406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
   407 
   408     /**
   409      * Weak bidirectional character type "ET" in the Unicode specification.
   410      * @since 1.4
   411      */
   412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
   413 
   414     /**
   415      * Weak bidirectional character type "AN" in the Unicode specification.
   416      * @since 1.4
   417      */
   418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
   419 
   420     /**
   421      * Weak bidirectional character type "CS" in the Unicode specification.
   422      * @since 1.4
   423      */
   424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
   425 
   426     /**
   427      * Weak bidirectional character type "NSM" in the Unicode specification.
   428      * @since 1.4
   429      */
   430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
   431 
   432     /**
   433      * Weak bidirectional character type "BN" in the Unicode specification.
   434      * @since 1.4
   435      */
   436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
   437 
   438     /**
   439      * Neutral bidirectional character type "B" in the Unicode specification.
   440      * @since 1.4
   441      */
   442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
   443 
   444     /**
   445      * Neutral bidirectional character type "S" in the Unicode specification.
   446      * @since 1.4
   447      */
   448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
   449 
   450     /**
   451      * Neutral bidirectional character type "WS" in the Unicode specification.
   452      * @since 1.4
   453      */
   454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
   455 
   456     /**
   457      * Neutral bidirectional character type "ON" in the Unicode specification.
   458      * @since 1.4
   459      */
   460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
   461 
   462     /**
   463      * Strong bidirectional character type "LRE" in the Unicode specification.
   464      * @since 1.4
   465      */
   466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
   467 
   468     /**
   469      * Strong bidirectional character type "LRO" in the Unicode specification.
   470      * @since 1.4
   471      */
   472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
   473 
   474     /**
   475      * Strong bidirectional character type "RLE" in the Unicode specification.
   476      * @since 1.4
   477      */
   478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
   479 
   480     /**
   481      * Strong bidirectional character type "RLO" in the Unicode specification.
   482      * @since 1.4
   483      */
   484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
   485 
   486     /**
   487      * Weak bidirectional character type "PDF" in the Unicode specification.
   488      * @since 1.4
   489      */
   490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
   491 
   492     /**
   493      * The minimum value of a
   494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   495      * Unicode high-surrogate code unit</a>
   496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
   497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   498      *
   499      * @since 1.5
   500      */
   501     public static final char MIN_HIGH_SURROGATE = '\uD800';
   502 
   503     /**
   504      * The maximum value of a
   505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
   506      * Unicode high-surrogate code unit</a>
   507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
   508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
   509      *
   510      * @since 1.5
   511      */
   512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
   513 
   514     /**
   515      * The minimum value of a
   516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   517      * Unicode low-surrogate code unit</a>
   518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
   519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   520      *
   521      * @since 1.5
   522      */
   523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
   524 
   525     /**
   526      * The maximum value of a
   527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
   528      * Unicode low-surrogate code unit</a>
   529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
   531      *
   532      * @since 1.5
   533      */
   534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
   535 
   536     /**
   537      * The minimum value of a Unicode surrogate code unit in the
   538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
   539      *
   540      * @since 1.5
   541      */
   542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
   543 
   544     /**
   545      * The maximum value of a Unicode surrogate code unit in the
   546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
   547      *
   548      * @since 1.5
   549      */
   550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
   551 
   552     /**
   553      * The minimum value of a
   554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
   555      * Unicode supplementary code point</a>, constant {@code U+10000}.
   556      *
   557      * @since 1.5
   558      */
   559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
   560 
   561     /**
   562      * The minimum value of a
   563      * <a href="http://www.unicode.org/glossary/#code_point">
   564      * Unicode code point</a>, constant {@code U+0000}.
   565      *
   566      * @since 1.5
   567      */
   568     public static final int MIN_CODE_POINT = 0x000000;
   569 
   570     /**
   571      * The maximum value of a
   572      * <a href="http://www.unicode.org/glossary/#code_point">
   573      * Unicode code point</a>, constant {@code U+10FFFF}.
   574      *
   575      * @since 1.5
   576      */
   577     public static final int MAX_CODE_POINT = 0X10FFFF;
   578 
   579 
   580     /**
   581      * Instances of this class represent particular subsets of the Unicode
   582      * character set.  The only family of subsets defined in the
   583      * {@code Character} class is {@link Character.UnicodeBlock}.
   584      * Other portions of the Java API may define other subsets for their
   585      * own purposes.
   586      *
   587      * @since 1.2
   588      */
   589     public static class Subset  {
   590 
   591         private String name;
   592 
   593         /**
   594          * Constructs a new {@code Subset} instance.
   595          *
   596          * @param  name  The name of this subset
   597          * @exception NullPointerException if name is {@code null}
   598          */
   599         protected Subset(String name) {
   600             if (name == null) {
   601                 throw new NullPointerException("name");
   602             }
   603             this.name = name;
   604         }
   605 
   606         /**
   607          * Compares two {@code Subset} objects for equality.
   608          * This method returns {@code true} if and only if
   609          * {@code this} and the argument refer to the same
   610          * object; since this method is {@code final}, this
   611          * guarantee holds for all subclasses.
   612          */
   613         public final boolean equals(Object obj) {
   614             return (this == obj);
   615         }
   616 
   617         /**
   618          * Returns the standard hash code as defined by the
   619          * {@link Object#hashCode} method.  This method
   620          * is {@code final} in order to ensure that the
   621          * {@code equals} and {@code hashCode} methods will
   622          * be consistent in all subclasses.
   623          */
   624         public final int hashCode() {
   625             return super.hashCode();
   626         }
   627 
   628         /**
   629          * Returns the name of this subset.
   630          */
   631         public final String toString() {
   632             return name;
   633         }
   634     }
   635 
   636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
   637     // for the latest specification of Unicode Blocks.
   638 
   639     /**
   640      * A family of character subsets representing the character blocks in the
   641      * Unicode specification. Character blocks generally define characters
   642      * used for a specific script or purpose. A character is contained by
   643      * at most one Unicode block.
   644      *
   645      * @since 1.2
   646      */
   647     public static final class UnicodeBlock extends Subset {
   648 
   649         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
   650 
   651         /**
   652          * Creates a UnicodeBlock with the given identifier name.
   653          * This name must be the same as the block identifier.
   654          */
   655         private UnicodeBlock(String idName) {
   656             super(idName);
   657             map.put(idName, this);
   658         }
   659 
   660         /**
   661          * Creates a UnicodeBlock with the given identifier name and
   662          * alias name.
   663          */
   664         private UnicodeBlock(String idName, String alias) {
   665             this(idName);
   666             map.put(alias, this);
   667         }
   668 
   669         /**
   670          * Creates a UnicodeBlock with the given identifier name and
   671          * alias names.
   672          */
   673         private UnicodeBlock(String idName, String... aliases) {
   674             this(idName);
   675             for (String alias : aliases)
   676                 map.put(alias, this);
   677         }
   678 
   679         /**
   680          * Constant for the "Basic Latin" Unicode character block.
   681          * @since 1.2
   682          */
   683         public static final UnicodeBlock  BASIC_LATIN =
   684             new UnicodeBlock("BASIC_LATIN",
   685                              "BASIC LATIN",
   686                              "BASICLATIN");
   687 
   688         /**
   689          * Constant for the "Latin-1 Supplement" Unicode character block.
   690          * @since 1.2
   691          */
   692         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
   693             new UnicodeBlock("LATIN_1_SUPPLEMENT",
   694                              "LATIN-1 SUPPLEMENT",
   695                              "LATIN-1SUPPLEMENT");
   696 
   697         /**
   698          * Constant for the "Latin Extended-A" Unicode character block.
   699          * @since 1.2
   700          */
   701         public static final UnicodeBlock LATIN_EXTENDED_A =
   702             new UnicodeBlock("LATIN_EXTENDED_A",
   703                              "LATIN EXTENDED-A",
   704                              "LATINEXTENDED-A");
   705 
   706         /**
   707          * Constant for the "Latin Extended-B" Unicode character block.
   708          * @since 1.2
   709          */
   710         public static final UnicodeBlock LATIN_EXTENDED_B =
   711             new UnicodeBlock("LATIN_EXTENDED_B",
   712                              "LATIN EXTENDED-B",
   713                              "LATINEXTENDED-B");
   714 
   715         /**
   716          * Constant for the "IPA Extensions" Unicode character block.
   717          * @since 1.2
   718          */
   719         public static final UnicodeBlock IPA_EXTENSIONS =
   720             new UnicodeBlock("IPA_EXTENSIONS",
   721                              "IPA EXTENSIONS",
   722                              "IPAEXTENSIONS");
   723 
   724         /**
   725          * Constant for the "Spacing Modifier Letters" Unicode character block.
   726          * @since 1.2
   727          */
   728         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
   729             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
   730                              "SPACING MODIFIER LETTERS",
   731                              "SPACINGMODIFIERLETTERS");
   732 
   733         /**
   734          * Constant for the "Combining Diacritical Marks" Unicode character block.
   735          * @since 1.2
   736          */
   737         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
   738             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
   739                              "COMBINING DIACRITICAL MARKS",
   740                              "COMBININGDIACRITICALMARKS");
   741 
   742         /**
   743          * Constant for the "Greek and Coptic" Unicode character block.
   744          * <p>
   745          * This block was previously known as the "Greek" block.
   746          *
   747          * @since 1.2
   748          */
   749         public static final UnicodeBlock GREEK =
   750             new UnicodeBlock("GREEK",
   751                              "GREEK AND COPTIC",
   752                              "GREEKANDCOPTIC");
   753 
   754         /**
   755          * Constant for the "Cyrillic" Unicode character block.
   756          * @since 1.2
   757          */
   758         public static final UnicodeBlock CYRILLIC =
   759             new UnicodeBlock("CYRILLIC");
   760 
   761         /**
   762          * Constant for the "Armenian" Unicode character block.
   763          * @since 1.2
   764          */
   765         public static final UnicodeBlock ARMENIAN =
   766             new UnicodeBlock("ARMENIAN");
   767 
   768         /**
   769          * Constant for the "Hebrew" Unicode character block.
   770          * @since 1.2
   771          */
   772         public static final UnicodeBlock HEBREW =
   773             new UnicodeBlock("HEBREW");
   774 
   775         /**
   776          * Constant for the "Arabic" Unicode character block.
   777          * @since 1.2
   778          */
   779         public static final UnicodeBlock ARABIC =
   780             new UnicodeBlock("ARABIC");
   781 
   782         /**
   783          * Constant for the "Devanagari" Unicode character block.
   784          * @since 1.2
   785          */
   786         public static final UnicodeBlock DEVANAGARI =
   787             new UnicodeBlock("DEVANAGARI");
   788 
   789         /**
   790          * Constant for the "Bengali" Unicode character block.
   791          * @since 1.2
   792          */
   793         public static final UnicodeBlock BENGALI =
   794             new UnicodeBlock("BENGALI");
   795 
   796         /**
   797          * Constant for the "Gurmukhi" Unicode character block.
   798          * @since 1.2
   799          */
   800         public static final UnicodeBlock GURMUKHI =
   801             new UnicodeBlock("GURMUKHI");
   802 
   803         /**
   804          * Constant for the "Gujarati" Unicode character block.
   805          * @since 1.2
   806          */
   807         public static final UnicodeBlock GUJARATI =
   808             new UnicodeBlock("GUJARATI");
   809 
   810         /**
   811          * Constant for the "Oriya" Unicode character block.
   812          * @since 1.2
   813          */
   814         public static final UnicodeBlock ORIYA =
   815             new UnicodeBlock("ORIYA");
   816 
   817         /**
   818          * Constant for the "Tamil" Unicode character block.
   819          * @since 1.2
   820          */
   821         public static final UnicodeBlock TAMIL =
   822             new UnicodeBlock("TAMIL");
   823 
   824         /**
   825          * Constant for the "Telugu" Unicode character block.
   826          * @since 1.2
   827          */
   828         public static final UnicodeBlock TELUGU =
   829             new UnicodeBlock("TELUGU");
   830 
   831         /**
   832          * Constant for the "Kannada" Unicode character block.
   833          * @since 1.2
   834          */
   835         public static final UnicodeBlock KANNADA =
   836             new UnicodeBlock("KANNADA");
   837 
   838         /**
   839          * Constant for the "Malayalam" Unicode character block.
   840          * @since 1.2
   841          */
   842         public static final UnicodeBlock MALAYALAM =
   843             new UnicodeBlock("MALAYALAM");
   844 
   845         /**
   846          * Constant for the "Thai" Unicode character block.
   847          * @since 1.2
   848          */
   849         public static final UnicodeBlock THAI =
   850             new UnicodeBlock("THAI");
   851 
   852         /**
   853          * Constant for the "Lao" Unicode character block.
   854          * @since 1.2
   855          */
   856         public static final UnicodeBlock LAO =
   857             new UnicodeBlock("LAO");
   858 
   859         /**
   860          * Constant for the "Tibetan" Unicode character block.
   861          * @since 1.2
   862          */
   863         public static final UnicodeBlock TIBETAN =
   864             new UnicodeBlock("TIBETAN");
   865 
   866         /**
   867          * Constant for the "Georgian" Unicode character block.
   868          * @since 1.2
   869          */
   870         public static final UnicodeBlock GEORGIAN =
   871             new UnicodeBlock("GEORGIAN");
   872 
   873         /**
   874          * Constant for the "Hangul Jamo" Unicode character block.
   875          * @since 1.2
   876          */
   877         public static final UnicodeBlock HANGUL_JAMO =
   878             new UnicodeBlock("HANGUL_JAMO",
   879                              "HANGUL JAMO",
   880                              "HANGULJAMO");
   881 
   882         /**
   883          * Constant for the "Latin Extended Additional" Unicode character block.
   884          * @since 1.2
   885          */
   886         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
   887             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
   888                              "LATIN EXTENDED ADDITIONAL",
   889                              "LATINEXTENDEDADDITIONAL");
   890 
   891         /**
   892          * Constant for the "Greek Extended" Unicode character block.
   893          * @since 1.2
   894          */
   895         public static final UnicodeBlock GREEK_EXTENDED =
   896             new UnicodeBlock("GREEK_EXTENDED",
   897                              "GREEK EXTENDED",
   898                              "GREEKEXTENDED");
   899 
   900         /**
   901          * Constant for the "General Punctuation" Unicode character block.
   902          * @since 1.2
   903          */
   904         public static final UnicodeBlock GENERAL_PUNCTUATION =
   905             new UnicodeBlock("GENERAL_PUNCTUATION",
   906                              "GENERAL PUNCTUATION",
   907                              "GENERALPUNCTUATION");
   908 
   909         /**
   910          * Constant for the "Superscripts and Subscripts" Unicode character
   911          * block.
   912          * @since 1.2
   913          */
   914         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
   915             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
   916                              "SUPERSCRIPTS AND SUBSCRIPTS",
   917                              "SUPERSCRIPTSANDSUBSCRIPTS");
   918 
   919         /**
   920          * Constant for the "Currency Symbols" Unicode character block.
   921          * @since 1.2
   922          */
   923         public static final UnicodeBlock CURRENCY_SYMBOLS =
   924             new UnicodeBlock("CURRENCY_SYMBOLS",
   925                              "CURRENCY SYMBOLS",
   926                              "CURRENCYSYMBOLS");
   927 
   928         /**
   929          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
   930          * character block.
   931          * <p>
   932          * This block was previously known as "Combining Marks for Symbols".
   933          * @since 1.2
   934          */
   935         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
   936             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
   937                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
   938                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
   939                              "COMBINING MARKS FOR SYMBOLS",
   940                              "COMBININGMARKSFORSYMBOLS");
   941 
   942         /**
   943          * Constant for the "Letterlike Symbols" Unicode character block.
   944          * @since 1.2
   945          */
   946         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
   947             new UnicodeBlock("LETTERLIKE_SYMBOLS",
   948                              "LETTERLIKE SYMBOLS",
   949                              "LETTERLIKESYMBOLS");
   950 
   951         /**
   952          * Constant for the "Number Forms" Unicode character block.
   953          * @since 1.2
   954          */
   955         public static final UnicodeBlock NUMBER_FORMS =
   956             new UnicodeBlock("NUMBER_FORMS",
   957                              "NUMBER FORMS",
   958                              "NUMBERFORMS");
   959 
   960         /**
   961          * Constant for the "Arrows" Unicode character block.
   962          * @since 1.2
   963          */
   964         public static final UnicodeBlock ARROWS =
   965             new UnicodeBlock("ARROWS");
   966 
   967         /**
   968          * Constant for the "Mathematical Operators" Unicode character block.
   969          * @since 1.2
   970          */
   971         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
   972             new UnicodeBlock("MATHEMATICAL_OPERATORS",
   973                              "MATHEMATICAL OPERATORS",
   974                              "MATHEMATICALOPERATORS");
   975 
   976         /**
   977          * Constant for the "Miscellaneous Technical" Unicode character block.
   978          * @since 1.2
   979          */
   980         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
   981             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
   982                              "MISCELLANEOUS TECHNICAL",
   983                              "MISCELLANEOUSTECHNICAL");
   984 
   985         /**
   986          * Constant for the "Control Pictures" Unicode character block.
   987          * @since 1.2
   988          */
   989         public static final UnicodeBlock CONTROL_PICTURES =
   990             new UnicodeBlock("CONTROL_PICTURES",
   991                              "CONTROL PICTURES",
   992                              "CONTROLPICTURES");
   993 
   994         /**
   995          * Constant for the "Optical Character Recognition" Unicode character block.
   996          * @since 1.2
   997          */
   998         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
   999             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
  1000                              "OPTICAL CHARACTER RECOGNITION",
  1001                              "OPTICALCHARACTERRECOGNITION");
  1002 
  1003         /**
  1004          * Constant for the "Enclosed Alphanumerics" Unicode character block.
  1005          * @since 1.2
  1006          */
  1007         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
  1008             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
  1009                              "ENCLOSED ALPHANUMERICS",
  1010                              "ENCLOSEDALPHANUMERICS");
  1011 
  1012         /**
  1013          * Constant for the "Box Drawing" Unicode character block.
  1014          * @since 1.2
  1015          */
  1016         public static final UnicodeBlock BOX_DRAWING =
  1017             new UnicodeBlock("BOX_DRAWING",
  1018                              "BOX DRAWING",
  1019                              "BOXDRAWING");
  1020 
  1021         /**
  1022          * Constant for the "Block Elements" Unicode character block.
  1023          * @since 1.2
  1024          */
  1025         public static final UnicodeBlock BLOCK_ELEMENTS =
  1026             new UnicodeBlock("BLOCK_ELEMENTS",
  1027                              "BLOCK ELEMENTS",
  1028                              "BLOCKELEMENTS");
  1029 
  1030         /**
  1031          * Constant for the "Geometric Shapes" Unicode character block.
  1032          * @since 1.2
  1033          */
  1034         public static final UnicodeBlock GEOMETRIC_SHAPES =
  1035             new UnicodeBlock("GEOMETRIC_SHAPES",
  1036                              "GEOMETRIC SHAPES",
  1037                              "GEOMETRICSHAPES");
  1038 
  1039         /**
  1040          * Constant for the "Miscellaneous Symbols" Unicode character block.
  1041          * @since 1.2
  1042          */
  1043         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
  1044             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
  1045                              "MISCELLANEOUS SYMBOLS",
  1046                              "MISCELLANEOUSSYMBOLS");
  1047 
  1048         /**
  1049          * Constant for the "Dingbats" Unicode character block.
  1050          * @since 1.2
  1051          */
  1052         public static final UnicodeBlock DINGBATS =
  1053             new UnicodeBlock("DINGBATS");
  1054 
  1055         /**
  1056          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
  1057          * @since 1.2
  1058          */
  1059         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
  1060             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
  1061                              "CJK SYMBOLS AND PUNCTUATION",
  1062                              "CJKSYMBOLSANDPUNCTUATION");
  1063 
  1064         /**
  1065          * Constant for the "Hiragana" Unicode character block.
  1066          * @since 1.2
  1067          */
  1068         public static final UnicodeBlock HIRAGANA =
  1069             new UnicodeBlock("HIRAGANA");
  1070 
  1071         /**
  1072          * Constant for the "Katakana" Unicode character block.
  1073          * @since 1.2
  1074          */
  1075         public static final UnicodeBlock KATAKANA =
  1076             new UnicodeBlock("KATAKANA");
  1077 
  1078         /**
  1079          * Constant for the "Bopomofo" Unicode character block.
  1080          * @since 1.2
  1081          */
  1082         public static final UnicodeBlock BOPOMOFO =
  1083             new UnicodeBlock("BOPOMOFO");
  1084 
  1085         /**
  1086          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
  1087          * @since 1.2
  1088          */
  1089         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
  1090             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
  1091                              "HANGUL COMPATIBILITY JAMO",
  1092                              "HANGULCOMPATIBILITYJAMO");
  1093 
  1094         /**
  1095          * Constant for the "Kanbun" Unicode character block.
  1096          * @since 1.2
  1097          */
  1098         public static final UnicodeBlock KANBUN =
  1099             new UnicodeBlock("KANBUN");
  1100 
  1101         /**
  1102          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
  1103          * @since 1.2
  1104          */
  1105         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
  1106             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
  1107                              "ENCLOSED CJK LETTERS AND MONTHS",
  1108                              "ENCLOSEDCJKLETTERSANDMONTHS");
  1109 
  1110         /**
  1111          * Constant for the "CJK Compatibility" Unicode character block.
  1112          * @since 1.2
  1113          */
  1114         public static final UnicodeBlock CJK_COMPATIBILITY =
  1115             new UnicodeBlock("CJK_COMPATIBILITY",
  1116                              "CJK COMPATIBILITY",
  1117                              "CJKCOMPATIBILITY");
  1118 
  1119         /**
  1120          * Constant for the "CJK Unified Ideographs" Unicode character block.
  1121          * @since 1.2
  1122          */
  1123         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
  1124             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
  1125                              "CJK UNIFIED IDEOGRAPHS",
  1126                              "CJKUNIFIEDIDEOGRAPHS");
  1127 
  1128         /**
  1129          * Constant for the "Hangul Syllables" Unicode character block.
  1130          * @since 1.2
  1131          */
  1132         public static final UnicodeBlock HANGUL_SYLLABLES =
  1133             new UnicodeBlock("HANGUL_SYLLABLES",
  1134                              "HANGUL SYLLABLES",
  1135                              "HANGULSYLLABLES");
  1136 
  1137         /**
  1138          * Constant for the "Private Use Area" Unicode character block.
  1139          * @since 1.2
  1140          */
  1141         public static final UnicodeBlock PRIVATE_USE_AREA =
  1142             new UnicodeBlock("PRIVATE_USE_AREA",
  1143                              "PRIVATE USE AREA",
  1144                              "PRIVATEUSEAREA");
  1145 
  1146         /**
  1147          * Constant for the "CJK Compatibility Ideographs" Unicode character
  1148          * block.
  1149          * @since 1.2
  1150          */
  1151         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
  1152             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
  1153                              "CJK COMPATIBILITY IDEOGRAPHS",
  1154                              "CJKCOMPATIBILITYIDEOGRAPHS");
  1155 
  1156         /**
  1157          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
  1158          * @since 1.2
  1159          */
  1160         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
  1161             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
  1162                              "ALPHABETIC PRESENTATION FORMS",
  1163                              "ALPHABETICPRESENTATIONFORMS");
  1164 
  1165         /**
  1166          * Constant for the "Arabic Presentation Forms-A" Unicode character
  1167          * block.
  1168          * @since 1.2
  1169          */
  1170         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
  1171             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
  1172                              "ARABIC PRESENTATION FORMS-A",
  1173                              "ARABICPRESENTATIONFORMS-A");
  1174 
  1175         /**
  1176          * Constant for the "Combining Half Marks" Unicode character block.
  1177          * @since 1.2
  1178          */
  1179         public static final UnicodeBlock COMBINING_HALF_MARKS =
  1180             new UnicodeBlock("COMBINING_HALF_MARKS",
  1181                              "COMBINING HALF MARKS",
  1182                              "COMBININGHALFMARKS");
  1183 
  1184         /**
  1185          * Constant for the "CJK Compatibility Forms" Unicode character block.
  1186          * @since 1.2
  1187          */
  1188         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
  1189             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
  1190                              "CJK COMPATIBILITY FORMS",
  1191                              "CJKCOMPATIBILITYFORMS");
  1192 
  1193         /**
  1194          * Constant for the "Small Form Variants" Unicode character block.
  1195          * @since 1.2
  1196          */
  1197         public static final UnicodeBlock SMALL_FORM_VARIANTS =
  1198             new UnicodeBlock("SMALL_FORM_VARIANTS",
  1199                              "SMALL FORM VARIANTS",
  1200                              "SMALLFORMVARIANTS");
  1201 
  1202         /**
  1203          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
  1204          * @since 1.2
  1205          */
  1206         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
  1207             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
  1208                              "ARABIC PRESENTATION FORMS-B",
  1209                              "ARABICPRESENTATIONFORMS-B");
  1210 
  1211         /**
  1212          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
  1213          * block.
  1214          * @since 1.2
  1215          */
  1216         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
  1217             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
  1218                              "HALFWIDTH AND FULLWIDTH FORMS",
  1219                              "HALFWIDTHANDFULLWIDTHFORMS");
  1220 
  1221         /**
  1222          * Constant for the "Specials" Unicode character block.
  1223          * @since 1.2
  1224          */
  1225         public static final UnicodeBlock SPECIALS =
  1226             new UnicodeBlock("SPECIALS");
  1227 
  1228         /**
  1229          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
  1230          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
  1231          *             {@link #LOW_SURROGATES}. These new constants match
  1232          *             the block definitions of the Unicode Standard.
  1233          *             The {@link #of(char)} and {@link #of(int)} methods
  1234          *             return the new constants, not SURROGATES_AREA.
  1235          */
  1236         @Deprecated
  1237         public static final UnicodeBlock SURROGATES_AREA =
  1238             new UnicodeBlock("SURROGATES_AREA");
  1239 
  1240         /**
  1241          * Constant for the "Syriac" Unicode character block.
  1242          * @since 1.4
  1243          */
  1244         public static final UnicodeBlock SYRIAC =
  1245             new UnicodeBlock("SYRIAC");
  1246 
  1247         /**
  1248          * Constant for the "Thaana" Unicode character block.
  1249          * @since 1.4
  1250          */
  1251         public static final UnicodeBlock THAANA =
  1252             new UnicodeBlock("THAANA");
  1253 
  1254         /**
  1255          * Constant for the "Sinhala" Unicode character block.
  1256          * @since 1.4
  1257          */
  1258         public static final UnicodeBlock SINHALA =
  1259             new UnicodeBlock("SINHALA");
  1260 
  1261         /**
  1262          * Constant for the "Myanmar" Unicode character block.
  1263          * @since 1.4
  1264          */
  1265         public static final UnicodeBlock MYANMAR =
  1266             new UnicodeBlock("MYANMAR");
  1267 
  1268         /**
  1269          * Constant for the "Ethiopic" Unicode character block.
  1270          * @since 1.4
  1271          */
  1272         public static final UnicodeBlock ETHIOPIC =
  1273             new UnicodeBlock("ETHIOPIC");
  1274 
  1275         /**
  1276          * Constant for the "Cherokee" Unicode character block.
  1277          * @since 1.4
  1278          */
  1279         public static final UnicodeBlock CHEROKEE =
  1280             new UnicodeBlock("CHEROKEE");
  1281 
  1282         /**
  1283          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
  1284          * @since 1.4
  1285          */
  1286         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
  1287             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
  1288                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
  1289                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
  1290 
  1291         /**
  1292          * Constant for the "Ogham" Unicode character block.
  1293          * @since 1.4
  1294          */
  1295         public static final UnicodeBlock OGHAM =
  1296             new UnicodeBlock("OGHAM");
  1297 
  1298         /**
  1299          * Constant for the "Runic" Unicode character block.
  1300          * @since 1.4
  1301          */
  1302         public static final UnicodeBlock RUNIC =
  1303             new UnicodeBlock("RUNIC");
  1304 
  1305         /**
  1306          * Constant for the "Khmer" Unicode character block.
  1307          * @since 1.4
  1308          */
  1309         public static final UnicodeBlock KHMER =
  1310             new UnicodeBlock("KHMER");
  1311 
  1312         /**
  1313          * Constant for the "Mongolian" Unicode character block.
  1314          * @since 1.4
  1315          */
  1316         public static final UnicodeBlock MONGOLIAN =
  1317             new UnicodeBlock("MONGOLIAN");
  1318 
  1319         /**
  1320          * Constant for the "Braille Patterns" Unicode character block.
  1321          * @since 1.4
  1322          */
  1323         public static final UnicodeBlock BRAILLE_PATTERNS =
  1324             new UnicodeBlock("BRAILLE_PATTERNS",
  1325                              "BRAILLE PATTERNS",
  1326                              "BRAILLEPATTERNS");
  1327 
  1328         /**
  1329          * Constant for the "CJK Radicals Supplement" Unicode character block.
  1330          * @since 1.4
  1331          */
  1332         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
  1333             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
  1334                              "CJK RADICALS SUPPLEMENT",
  1335                              "CJKRADICALSSUPPLEMENT");
  1336 
  1337         /**
  1338          * Constant for the "Kangxi Radicals" Unicode character block.
  1339          * @since 1.4
  1340          */
  1341         public static final UnicodeBlock KANGXI_RADICALS =
  1342             new UnicodeBlock("KANGXI_RADICALS",
  1343                              "KANGXI RADICALS",
  1344                              "KANGXIRADICALS");
  1345 
  1346         /**
  1347          * Constant for the "Ideographic Description Characters" Unicode character block.
  1348          * @since 1.4
  1349          */
  1350         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
  1351             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
  1352                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
  1353                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
  1354 
  1355         /**
  1356          * Constant for the "Bopomofo Extended" Unicode character block.
  1357          * @since 1.4
  1358          */
  1359         public static final UnicodeBlock BOPOMOFO_EXTENDED =
  1360             new UnicodeBlock("BOPOMOFO_EXTENDED",
  1361                              "BOPOMOFO EXTENDED",
  1362                              "BOPOMOFOEXTENDED");
  1363 
  1364         /**
  1365          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
  1366          * @since 1.4
  1367          */
  1368         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
  1369             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
  1370                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
  1371                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
  1372 
  1373         /**
  1374          * Constant for the "Yi Syllables" Unicode character block.
  1375          * @since 1.4
  1376          */
  1377         public static final UnicodeBlock YI_SYLLABLES =
  1378             new UnicodeBlock("YI_SYLLABLES",
  1379                              "YI SYLLABLES",
  1380                              "YISYLLABLES");
  1381 
  1382         /**
  1383          * Constant for the "Yi Radicals" Unicode character block.
  1384          * @since 1.4
  1385          */
  1386         public static final UnicodeBlock YI_RADICALS =
  1387             new UnicodeBlock("YI_RADICALS",
  1388                              "YI RADICALS",
  1389                              "YIRADICALS");
  1390 
  1391         /**
  1392          * Constant for the "Cyrillic Supplementary" Unicode character block.
  1393          * @since 1.5
  1394          */
  1395         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
  1396             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
  1397                              "CYRILLIC SUPPLEMENTARY",
  1398                              "CYRILLICSUPPLEMENTARY",
  1399                              "CYRILLIC SUPPLEMENT",
  1400                              "CYRILLICSUPPLEMENT");
  1401 
  1402         /**
  1403          * Constant for the "Tagalog" Unicode character block.
  1404          * @since 1.5
  1405          */
  1406         public static final UnicodeBlock TAGALOG =
  1407             new UnicodeBlock("TAGALOG");
  1408 
  1409         /**
  1410          * Constant for the "Hanunoo" Unicode character block.
  1411          * @since 1.5
  1412          */
  1413         public static final UnicodeBlock HANUNOO =
  1414             new UnicodeBlock("HANUNOO");
  1415 
  1416         /**
  1417          * Constant for the "Buhid" Unicode character block.
  1418          * @since 1.5
  1419          */
  1420         public static final UnicodeBlock BUHID =
  1421             new UnicodeBlock("BUHID");
  1422 
  1423         /**
  1424          * Constant for the "Tagbanwa" Unicode character block.
  1425          * @since 1.5
  1426          */
  1427         public static final UnicodeBlock TAGBANWA =
  1428             new UnicodeBlock("TAGBANWA");
  1429 
  1430         /**
  1431          * Constant for the "Limbu" Unicode character block.
  1432          * @since 1.5
  1433          */
  1434         public static final UnicodeBlock LIMBU =
  1435             new UnicodeBlock("LIMBU");
  1436 
  1437         /**
  1438          * Constant for the "Tai Le" Unicode character block.
  1439          * @since 1.5
  1440          */
  1441         public static final UnicodeBlock TAI_LE =
  1442             new UnicodeBlock("TAI_LE",
  1443                              "TAI LE",
  1444                              "TAILE");
  1445 
  1446         /**
  1447          * Constant for the "Khmer Symbols" Unicode character block.
  1448          * @since 1.5
  1449          */
  1450         public static final UnicodeBlock KHMER_SYMBOLS =
  1451             new UnicodeBlock("KHMER_SYMBOLS",
  1452                              "KHMER SYMBOLS",
  1453                              "KHMERSYMBOLS");
  1454 
  1455         /**
  1456          * Constant for the "Phonetic Extensions" Unicode character block.
  1457          * @since 1.5
  1458          */
  1459         public static final UnicodeBlock PHONETIC_EXTENSIONS =
  1460             new UnicodeBlock("PHONETIC_EXTENSIONS",
  1461                              "PHONETIC EXTENSIONS",
  1462                              "PHONETICEXTENSIONS");
  1463 
  1464         /**
  1465          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
  1466          * @since 1.5
  1467          */
  1468         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
  1469             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
  1470                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
  1471                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
  1472 
  1473         /**
  1474          * Constant for the "Supplemental Arrows-A" Unicode character block.
  1475          * @since 1.5
  1476          */
  1477         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
  1478             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
  1479                              "SUPPLEMENTAL ARROWS-A",
  1480                              "SUPPLEMENTALARROWS-A");
  1481 
  1482         /**
  1483          * Constant for the "Supplemental Arrows-B" Unicode character block.
  1484          * @since 1.5
  1485          */
  1486         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
  1487             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
  1488                              "SUPPLEMENTAL ARROWS-B",
  1489                              "SUPPLEMENTALARROWS-B");
  1490 
  1491         /**
  1492          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
  1493          * character block.
  1494          * @since 1.5
  1495          */
  1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
  1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
  1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
  1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
  1500 
  1501         /**
  1502          * Constant for the "Supplemental Mathematical Operators" Unicode
  1503          * character block.
  1504          * @since 1.5
  1505          */
  1506         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
  1507             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
  1508                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
  1509                              "SUPPLEMENTALMATHEMATICALOPERATORS");
  1510 
  1511         /**
  1512          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
  1513          * block.
  1514          * @since 1.5
  1515          */
  1516         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
  1517             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
  1518                              "MISCELLANEOUS SYMBOLS AND ARROWS",
  1519                              "MISCELLANEOUSSYMBOLSANDARROWS");
  1520 
  1521         /**
  1522          * Constant for the "Katakana Phonetic Extensions" Unicode character
  1523          * block.
  1524          * @since 1.5
  1525          */
  1526         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
  1527             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
  1528                              "KATAKANA PHONETIC EXTENSIONS",
  1529                              "KATAKANAPHONETICEXTENSIONS");
  1530 
  1531         /**
  1532          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
  1533          * @since 1.5
  1534          */
  1535         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
  1536             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
  1537                              "YIJING HEXAGRAM SYMBOLS",
  1538                              "YIJINGHEXAGRAMSYMBOLS");
  1539 
  1540         /**
  1541          * Constant for the "Variation Selectors" Unicode character block.
  1542          * @since 1.5
  1543          */
  1544         public static final UnicodeBlock VARIATION_SELECTORS =
  1545             new UnicodeBlock("VARIATION_SELECTORS",
  1546                              "VARIATION SELECTORS",
  1547                              "VARIATIONSELECTORS");
  1548 
  1549         /**
  1550          * Constant for the "Linear B Syllabary" Unicode character block.
  1551          * @since 1.5
  1552          */
  1553         public static final UnicodeBlock LINEAR_B_SYLLABARY =
  1554             new UnicodeBlock("LINEAR_B_SYLLABARY",
  1555                              "LINEAR B SYLLABARY",
  1556                              "LINEARBSYLLABARY");
  1557 
  1558         /**
  1559          * Constant for the "Linear B Ideograms" Unicode character block.
  1560          * @since 1.5
  1561          */
  1562         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
  1563             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
  1564                              "LINEAR B IDEOGRAMS",
  1565                              "LINEARBIDEOGRAMS");
  1566 
  1567         /**
  1568          * Constant for the "Aegean Numbers" Unicode character block.
  1569          * @since 1.5
  1570          */
  1571         public static final UnicodeBlock AEGEAN_NUMBERS =
  1572             new UnicodeBlock("AEGEAN_NUMBERS",
  1573                              "AEGEAN NUMBERS",
  1574                              "AEGEANNUMBERS");
  1575 
  1576         /**
  1577          * Constant for the "Old Italic" Unicode character block.
  1578          * @since 1.5
  1579          */
  1580         public static final UnicodeBlock OLD_ITALIC =
  1581             new UnicodeBlock("OLD_ITALIC",
  1582                              "OLD ITALIC",
  1583                              "OLDITALIC");
  1584 
  1585         /**
  1586          * Constant for the "Gothic" Unicode character block.
  1587          * @since 1.5
  1588          */
  1589         public static final UnicodeBlock GOTHIC =
  1590             new UnicodeBlock("GOTHIC");
  1591 
  1592         /**
  1593          * Constant for the "Ugaritic" Unicode character block.
  1594          * @since 1.5
  1595          */
  1596         public static final UnicodeBlock UGARITIC =
  1597             new UnicodeBlock("UGARITIC");
  1598 
  1599         /**
  1600          * Constant for the "Deseret" Unicode character block.
  1601          * @since 1.5
  1602          */
  1603         public static final UnicodeBlock DESERET =
  1604             new UnicodeBlock("DESERET");
  1605 
  1606         /**
  1607          * Constant for the "Shavian" Unicode character block.
  1608          * @since 1.5
  1609          */
  1610         public static final UnicodeBlock SHAVIAN =
  1611             new UnicodeBlock("SHAVIAN");
  1612 
  1613         /**
  1614          * Constant for the "Osmanya" Unicode character block.
  1615          * @since 1.5
  1616          */
  1617         public static final UnicodeBlock OSMANYA =
  1618             new UnicodeBlock("OSMANYA");
  1619 
  1620         /**
  1621          * Constant for the "Cypriot Syllabary" Unicode character block.
  1622          * @since 1.5
  1623          */
  1624         public static final UnicodeBlock CYPRIOT_SYLLABARY =
  1625             new UnicodeBlock("CYPRIOT_SYLLABARY",
  1626                              "CYPRIOT SYLLABARY",
  1627                              "CYPRIOTSYLLABARY");
  1628 
  1629         /**
  1630          * Constant for the "Byzantine Musical Symbols" Unicode character block.
  1631          * @since 1.5
  1632          */
  1633         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
  1634             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
  1635                              "BYZANTINE MUSICAL SYMBOLS",
  1636                              "BYZANTINEMUSICALSYMBOLS");
  1637 
  1638         /**
  1639          * Constant for the "Musical Symbols" Unicode character block.
  1640          * @since 1.5
  1641          */
  1642         public static final UnicodeBlock MUSICAL_SYMBOLS =
  1643             new UnicodeBlock("MUSICAL_SYMBOLS",
  1644                              "MUSICAL SYMBOLS",
  1645                              "MUSICALSYMBOLS");
  1646 
  1647         /**
  1648          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
  1649          * @since 1.5
  1650          */
  1651         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
  1652             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
  1653                              "TAI XUAN JING SYMBOLS",
  1654                              "TAIXUANJINGSYMBOLS");
  1655 
  1656         /**
  1657          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
  1658          * character block.
  1659          * @since 1.5
  1660          */
  1661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
  1662             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
  1663                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
  1664                              "MATHEMATICALALPHANUMERICSYMBOLS");
  1665 
  1666         /**
  1667          * Constant for the "CJK Unified Ideographs Extension B" Unicode
  1668          * character block.
  1669          * @since 1.5
  1670          */
  1671         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
  1672             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
  1673                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
  1674                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
  1675 
  1676         /**
  1677          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
  1678          * @since 1.5
  1679          */
  1680         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
  1681             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
  1682                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
  1683                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
  1684 
  1685         /**
  1686          * Constant for the "Tags" Unicode character block.
  1687          * @since 1.5
  1688          */
  1689         public static final UnicodeBlock TAGS =
  1690             new UnicodeBlock("TAGS");
  1691 
  1692         /**
  1693          * Constant for the "Variation Selectors Supplement" Unicode character
  1694          * block.
  1695          * @since 1.5
  1696          */
  1697         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
  1698             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
  1699                              "VARIATION SELECTORS SUPPLEMENT",
  1700                              "VARIATIONSELECTORSSUPPLEMENT");
  1701 
  1702         /**
  1703          * Constant for the "Supplementary Private Use Area-A" Unicode character
  1704          * block.
  1705          * @since 1.5
  1706          */
  1707         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
  1708             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
  1709                              "SUPPLEMENTARY PRIVATE USE AREA-A",
  1710                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
  1711 
  1712         /**
  1713          * Constant for the "Supplementary Private Use Area-B" Unicode character
  1714          * block.
  1715          * @since 1.5
  1716          */
  1717         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
  1718             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
  1719                              "SUPPLEMENTARY PRIVATE USE AREA-B",
  1720                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
  1721 
  1722         /**
  1723          * Constant for the "High Surrogates" Unicode character block.
  1724          * This block represents codepoint values in the high surrogate
  1725          * range: U+D800 through U+DB7F
  1726          *
  1727          * @since 1.5
  1728          */
  1729         public static final UnicodeBlock HIGH_SURROGATES =
  1730             new UnicodeBlock("HIGH_SURROGATES",
  1731                              "HIGH SURROGATES",
  1732                              "HIGHSURROGATES");
  1733 
  1734         /**
  1735          * Constant for the "High Private Use Surrogates" Unicode character
  1736          * block.
  1737          * This block represents codepoint values in the private use high
  1738          * surrogate range: U+DB80 through U+DBFF
  1739          *
  1740          * @since 1.5
  1741          */
  1742         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
  1743             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
  1744                              "HIGH PRIVATE USE SURROGATES",
  1745                              "HIGHPRIVATEUSESURROGATES");
  1746 
  1747         /**
  1748          * Constant for the "Low Surrogates" Unicode character block.
  1749          * This block represents codepoint values in the low surrogate
  1750          * range: U+DC00 through U+DFFF
  1751          *
  1752          * @since 1.5
  1753          */
  1754         public static final UnicodeBlock LOW_SURROGATES =
  1755             new UnicodeBlock("LOW_SURROGATES",
  1756                              "LOW SURROGATES",
  1757                              "LOWSURROGATES");
  1758 
  1759         /**
  1760          * Constant for the "Arabic Supplement" Unicode character block.
  1761          * @since 1.7
  1762          */
  1763         public static final UnicodeBlock ARABIC_SUPPLEMENT =
  1764             new UnicodeBlock("ARABIC_SUPPLEMENT",
  1765                              "ARABIC SUPPLEMENT",
  1766                              "ARABICSUPPLEMENT");
  1767 
  1768         /**
  1769          * Constant for the "NKo" Unicode character block.
  1770          * @since 1.7
  1771          */
  1772         public static final UnicodeBlock NKO =
  1773             new UnicodeBlock("NKO");
  1774 
  1775         /**
  1776          * Constant for the "Samaritan" Unicode character block.
  1777          * @since 1.7
  1778          */
  1779         public static final UnicodeBlock SAMARITAN =
  1780             new UnicodeBlock("SAMARITAN");
  1781 
  1782         /**
  1783          * Constant for the "Mandaic" Unicode character block.
  1784          * @since 1.7
  1785          */
  1786         public static final UnicodeBlock MANDAIC =
  1787             new UnicodeBlock("MANDAIC");
  1788 
  1789         /**
  1790          * Constant for the "Ethiopic Supplement" Unicode character block.
  1791          * @since 1.7
  1792          */
  1793         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
  1794             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
  1795                              "ETHIOPIC SUPPLEMENT",
  1796                              "ETHIOPICSUPPLEMENT");
  1797 
  1798         /**
  1799          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
  1800          * Unicode character block.
  1801          * @since 1.7
  1802          */
  1803         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
  1804             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
  1805                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
  1806                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
  1807 
  1808         /**
  1809          * Constant for the "New Tai Lue" Unicode character block.
  1810          * @since 1.7
  1811          */
  1812         public static final UnicodeBlock NEW_TAI_LUE =
  1813             new UnicodeBlock("NEW_TAI_LUE",
  1814                              "NEW TAI LUE",
  1815                              "NEWTAILUE");
  1816 
  1817         /**
  1818          * Constant for the "Buginese" Unicode character block.
  1819          * @since 1.7
  1820          */
  1821         public static final UnicodeBlock BUGINESE =
  1822             new UnicodeBlock("BUGINESE");
  1823 
  1824         /**
  1825          * Constant for the "Tai Tham" Unicode character block.
  1826          * @since 1.7
  1827          */
  1828         public static final UnicodeBlock TAI_THAM =
  1829             new UnicodeBlock("TAI_THAM",
  1830                              "TAI THAM",
  1831                              "TAITHAM");
  1832 
  1833         /**
  1834          * Constant for the "Balinese" Unicode character block.
  1835          * @since 1.7
  1836          */
  1837         public static final UnicodeBlock BALINESE =
  1838             new UnicodeBlock("BALINESE");
  1839 
  1840         /**
  1841          * Constant for the "Sundanese" Unicode character block.
  1842          * @since 1.7
  1843          */
  1844         public static final UnicodeBlock SUNDANESE =
  1845             new UnicodeBlock("SUNDANESE");
  1846 
  1847         /**
  1848          * Constant for the "Batak" Unicode character block.
  1849          * @since 1.7
  1850          */
  1851         public static final UnicodeBlock BATAK =
  1852             new UnicodeBlock("BATAK");
  1853 
  1854         /**
  1855          * Constant for the "Lepcha" Unicode character block.
  1856          * @since 1.7
  1857          */
  1858         public static final UnicodeBlock LEPCHA =
  1859             new UnicodeBlock("LEPCHA");
  1860 
  1861         /**
  1862          * Constant for the "Ol Chiki" Unicode character block.
  1863          * @since 1.7
  1864          */
  1865         public static final UnicodeBlock OL_CHIKI =
  1866             new UnicodeBlock("OL_CHIKI",
  1867                              "OL CHIKI",
  1868                              "OLCHIKI");
  1869 
  1870         /**
  1871          * Constant for the "Vedic Extensions" Unicode character block.
  1872          * @since 1.7
  1873          */
  1874         public static final UnicodeBlock VEDIC_EXTENSIONS =
  1875             new UnicodeBlock("VEDIC_EXTENSIONS",
  1876                              "VEDIC EXTENSIONS",
  1877                              "VEDICEXTENSIONS");
  1878 
  1879         /**
  1880          * Constant for the "Phonetic Extensions Supplement" Unicode character
  1881          * block.
  1882          * @since 1.7
  1883          */
  1884         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
  1885             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
  1886                              "PHONETIC EXTENSIONS SUPPLEMENT",
  1887                              "PHONETICEXTENSIONSSUPPLEMENT");
  1888 
  1889         /**
  1890          * Constant for the "Combining Diacritical Marks Supplement" Unicode
  1891          * character block.
  1892          * @since 1.7
  1893          */
  1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
  1895             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
  1896                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
  1897                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
  1898 
  1899         /**
  1900          * Constant for the "Glagolitic" Unicode character block.
  1901          * @since 1.7
  1902          */
  1903         public static final UnicodeBlock GLAGOLITIC =
  1904             new UnicodeBlock("GLAGOLITIC");
  1905 
  1906         /**
  1907          * Constant for the "Latin Extended-C" Unicode character block.
  1908          * @since 1.7
  1909          */
  1910         public static final UnicodeBlock LATIN_EXTENDED_C =
  1911             new UnicodeBlock("LATIN_EXTENDED_C",
  1912                              "LATIN EXTENDED-C",
  1913                              "LATINEXTENDED-C");
  1914 
  1915         /**
  1916          * Constant for the "Coptic" Unicode character block.
  1917          * @since 1.7
  1918          */
  1919         public static final UnicodeBlock COPTIC =
  1920             new UnicodeBlock("COPTIC");
  1921 
  1922         /**
  1923          * Constant for the "Georgian Supplement" Unicode character block.
  1924          * @since 1.7
  1925          */
  1926         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
  1927             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
  1928                              "GEORGIAN SUPPLEMENT",
  1929                              "GEORGIANSUPPLEMENT");
  1930 
  1931         /**
  1932          * Constant for the "Tifinagh" Unicode character block.
  1933          * @since 1.7
  1934          */
  1935         public static final UnicodeBlock TIFINAGH =
  1936             new UnicodeBlock("TIFINAGH");
  1937 
  1938         /**
  1939          * Constant for the "Ethiopic Extended" Unicode character block.
  1940          * @since 1.7
  1941          */
  1942         public static final UnicodeBlock ETHIOPIC_EXTENDED =
  1943             new UnicodeBlock("ETHIOPIC_EXTENDED",
  1944                              "ETHIOPIC EXTENDED",
  1945                              "ETHIOPICEXTENDED");
  1946 
  1947         /**
  1948          * Constant for the "Cyrillic Extended-A" Unicode character block.
  1949          * @since 1.7
  1950          */
  1951         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
  1952             new UnicodeBlock("CYRILLIC_EXTENDED_A",
  1953                              "CYRILLIC EXTENDED-A",
  1954                              "CYRILLICEXTENDED-A");
  1955 
  1956         /**
  1957          * Constant for the "Supplemental Punctuation" Unicode character block.
  1958          * @since 1.7
  1959          */
  1960         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
  1961             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
  1962                              "SUPPLEMENTAL PUNCTUATION",
  1963                              "SUPPLEMENTALPUNCTUATION");
  1964 
  1965         /**
  1966          * Constant for the "CJK Strokes" Unicode character block.
  1967          * @since 1.7
  1968          */
  1969         public static final UnicodeBlock CJK_STROKES =
  1970             new UnicodeBlock("CJK_STROKES",
  1971                              "CJK STROKES",
  1972                              "CJKSTROKES");
  1973 
  1974         /**
  1975          * Constant for the "Lisu" Unicode character block.
  1976          * @since 1.7
  1977          */
  1978         public static final UnicodeBlock LISU =
  1979             new UnicodeBlock("LISU");
  1980 
  1981         /**
  1982          * Constant for the "Vai" Unicode character block.
  1983          * @since 1.7
  1984          */
  1985         public static final UnicodeBlock VAI =
  1986             new UnicodeBlock("VAI");
  1987 
  1988         /**
  1989          * Constant for the "Cyrillic Extended-B" Unicode character block.
  1990          * @since 1.7
  1991          */
  1992         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
  1993             new UnicodeBlock("CYRILLIC_EXTENDED_B",
  1994                              "CYRILLIC EXTENDED-B",
  1995                              "CYRILLICEXTENDED-B");
  1996 
  1997         /**
  1998          * Constant for the "Bamum" Unicode character block.
  1999          * @since 1.7
  2000          */
  2001         public static final UnicodeBlock BAMUM =
  2002             new UnicodeBlock("BAMUM");
  2003 
  2004         /**
  2005          * Constant for the "Modifier Tone Letters" Unicode character block.
  2006          * @since 1.7
  2007          */
  2008         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
  2009             new UnicodeBlock("MODIFIER_TONE_LETTERS",
  2010                              "MODIFIER TONE LETTERS",
  2011                              "MODIFIERTONELETTERS");
  2012 
  2013         /**
  2014          * Constant for the "Latin Extended-D" Unicode character block.
  2015          * @since 1.7
  2016          */
  2017         public static final UnicodeBlock LATIN_EXTENDED_D =
  2018             new UnicodeBlock("LATIN_EXTENDED_D",
  2019                              "LATIN EXTENDED-D",
  2020                              "LATINEXTENDED-D");
  2021 
  2022         /**
  2023          * Constant for the "Syloti Nagri" Unicode character block.
  2024          * @since 1.7
  2025          */
  2026         public static final UnicodeBlock SYLOTI_NAGRI =
  2027             new UnicodeBlock("SYLOTI_NAGRI",
  2028                              "SYLOTI NAGRI",
  2029                              "SYLOTINAGRI");
  2030 
  2031         /**
  2032          * Constant for the "Common Indic Number Forms" Unicode character block.
  2033          * @since 1.7
  2034          */
  2035         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
  2036             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
  2037                              "COMMON INDIC NUMBER FORMS",
  2038                              "COMMONINDICNUMBERFORMS");
  2039 
  2040         /**
  2041          * Constant for the "Phags-pa" Unicode character block.
  2042          * @since 1.7
  2043          */
  2044         public static final UnicodeBlock PHAGS_PA =
  2045             new UnicodeBlock("PHAGS_PA",
  2046                              "PHAGS-PA");
  2047 
  2048         /**
  2049          * Constant for the "Saurashtra" Unicode character block.
  2050          * @since 1.7
  2051          */
  2052         public static final UnicodeBlock SAURASHTRA =
  2053             new UnicodeBlock("SAURASHTRA");
  2054 
  2055         /**
  2056          * Constant for the "Devanagari Extended" Unicode character block.
  2057          * @since 1.7
  2058          */
  2059         public static final UnicodeBlock DEVANAGARI_EXTENDED =
  2060             new UnicodeBlock("DEVANAGARI_EXTENDED",
  2061                              "DEVANAGARI EXTENDED",
  2062                              "DEVANAGARIEXTENDED");
  2063 
  2064         /**
  2065          * Constant for the "Kayah Li" Unicode character block.
  2066          * @since 1.7
  2067          */
  2068         public static final UnicodeBlock KAYAH_LI =
  2069             new UnicodeBlock("KAYAH_LI",
  2070                              "KAYAH LI",
  2071                              "KAYAHLI");
  2072 
  2073         /**
  2074          * Constant for the "Rejang" Unicode character block.
  2075          * @since 1.7
  2076          */
  2077         public static final UnicodeBlock REJANG =
  2078             new UnicodeBlock("REJANG");
  2079 
  2080         /**
  2081          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
  2082          * @since 1.7
  2083          */
  2084         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
  2085             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
  2086                              "HANGUL JAMO EXTENDED-A",
  2087                              "HANGULJAMOEXTENDED-A");
  2088 
  2089         /**
  2090          * Constant for the "Javanese" Unicode character block.
  2091          * @since 1.7
  2092          */
  2093         public static final UnicodeBlock JAVANESE =
  2094             new UnicodeBlock("JAVANESE");
  2095 
  2096         /**
  2097          * Constant for the "Cham" Unicode character block.
  2098          * @since 1.7
  2099          */
  2100         public static final UnicodeBlock CHAM =
  2101             new UnicodeBlock("CHAM");
  2102 
  2103         /**
  2104          * Constant for the "Myanmar Extended-A" Unicode character block.
  2105          * @since 1.7
  2106          */
  2107         public static final UnicodeBlock MYANMAR_EXTENDED_A =
  2108             new UnicodeBlock("MYANMAR_EXTENDED_A",
  2109                              "MYANMAR EXTENDED-A",
  2110                              "MYANMAREXTENDED-A");
  2111 
  2112         /**
  2113          * Constant for the "Tai Viet" Unicode character block.
  2114          * @since 1.7
  2115          */
  2116         public static final UnicodeBlock TAI_VIET =
  2117             new UnicodeBlock("TAI_VIET",
  2118                              "TAI VIET",
  2119                              "TAIVIET");
  2120 
  2121         /**
  2122          * Constant for the "Ethiopic Extended-A" Unicode character block.
  2123          * @since 1.7
  2124          */
  2125         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
  2126             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
  2127                              "ETHIOPIC EXTENDED-A",
  2128                              "ETHIOPICEXTENDED-A");
  2129 
  2130         /**
  2131          * Constant for the "Meetei Mayek" Unicode character block.
  2132          * @since 1.7
  2133          */
  2134         public static final UnicodeBlock MEETEI_MAYEK =
  2135             new UnicodeBlock("MEETEI_MAYEK",
  2136                              "MEETEI MAYEK",
  2137                              "MEETEIMAYEK");
  2138 
  2139         /**
  2140          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
  2141          * @since 1.7
  2142          */
  2143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
  2144             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
  2145                              "HANGUL JAMO EXTENDED-B",
  2146                              "HANGULJAMOEXTENDED-B");
  2147 
  2148         /**
  2149          * Constant for the "Vertical Forms" Unicode character block.
  2150          * @since 1.7
  2151          */
  2152         public static final UnicodeBlock VERTICAL_FORMS =
  2153             new UnicodeBlock("VERTICAL_FORMS",
  2154                              "VERTICAL FORMS",
  2155                              "VERTICALFORMS");
  2156 
  2157         /**
  2158          * Constant for the "Ancient Greek Numbers" Unicode character block.
  2159          * @since 1.7
  2160          */
  2161         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
  2162             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
  2163                              "ANCIENT GREEK NUMBERS",
  2164                              "ANCIENTGREEKNUMBERS");
  2165 
  2166         /**
  2167          * Constant for the "Ancient Symbols" Unicode character block.
  2168          * @since 1.7
  2169          */
  2170         public static final UnicodeBlock ANCIENT_SYMBOLS =
  2171             new UnicodeBlock("ANCIENT_SYMBOLS",
  2172                              "ANCIENT SYMBOLS",
  2173                              "ANCIENTSYMBOLS");
  2174 
  2175         /**
  2176          * Constant for the "Phaistos Disc" Unicode character block.
  2177          * @since 1.7
  2178          */
  2179         public static final UnicodeBlock PHAISTOS_DISC =
  2180             new UnicodeBlock("PHAISTOS_DISC",
  2181                              "PHAISTOS DISC",
  2182                              "PHAISTOSDISC");
  2183 
  2184         /**
  2185          * Constant for the "Lycian" Unicode character block.
  2186          * @since 1.7
  2187          */
  2188         public static final UnicodeBlock LYCIAN =
  2189             new UnicodeBlock("LYCIAN");
  2190 
  2191         /**
  2192          * Constant for the "Carian" Unicode character block.
  2193          * @since 1.7
  2194          */
  2195         public static final UnicodeBlock CARIAN =
  2196             new UnicodeBlock("CARIAN");
  2197 
  2198         /**
  2199          * Constant for the "Old Persian" Unicode character block.
  2200          * @since 1.7
  2201          */
  2202         public static final UnicodeBlock OLD_PERSIAN =
  2203             new UnicodeBlock("OLD_PERSIAN",
  2204                              "OLD PERSIAN",
  2205                              "OLDPERSIAN");
  2206 
  2207         /**
  2208          * Constant for the "Imperial Aramaic" Unicode character block.
  2209          * @since 1.7
  2210          */
  2211         public static final UnicodeBlock IMPERIAL_ARAMAIC =
  2212             new UnicodeBlock("IMPERIAL_ARAMAIC",
  2213                              "IMPERIAL ARAMAIC",
  2214                              "IMPERIALARAMAIC");
  2215 
  2216         /**
  2217          * Constant for the "Phoenician" Unicode character block.
  2218          * @since 1.7
  2219          */
  2220         public static final UnicodeBlock PHOENICIAN =
  2221             new UnicodeBlock("PHOENICIAN");
  2222 
  2223         /**
  2224          * Constant for the "Lydian" Unicode character block.
  2225          * @since 1.7
  2226          */
  2227         public static final UnicodeBlock LYDIAN =
  2228             new UnicodeBlock("LYDIAN");
  2229 
  2230         /**
  2231          * Constant for the "Kharoshthi" Unicode character block.
  2232          * @since 1.7
  2233          */
  2234         public static final UnicodeBlock KHAROSHTHI =
  2235             new UnicodeBlock("KHAROSHTHI");
  2236 
  2237         /**
  2238          * Constant for the "Old South Arabian" Unicode character block.
  2239          * @since 1.7
  2240          */
  2241         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
  2242             new UnicodeBlock("OLD_SOUTH_ARABIAN",
  2243                              "OLD SOUTH ARABIAN",
  2244                              "OLDSOUTHARABIAN");
  2245 
  2246         /**
  2247          * Constant for the "Avestan" Unicode character block.
  2248          * @since 1.7
  2249          */
  2250         public static final UnicodeBlock AVESTAN =
  2251             new UnicodeBlock("AVESTAN");
  2252 
  2253         /**
  2254          * Constant for the "Inscriptional Parthian" Unicode character block.
  2255          * @since 1.7
  2256          */
  2257         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
  2258             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
  2259                              "INSCRIPTIONAL PARTHIAN",
  2260                              "INSCRIPTIONALPARTHIAN");
  2261 
  2262         /**
  2263          * Constant for the "Inscriptional Pahlavi" Unicode character block.
  2264          * @since 1.7
  2265          */
  2266         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
  2267             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
  2268                              "INSCRIPTIONAL PAHLAVI",
  2269                              "INSCRIPTIONALPAHLAVI");
  2270 
  2271         /**
  2272          * Constant for the "Old Turkic" Unicode character block.
  2273          * @since 1.7
  2274          */
  2275         public static final UnicodeBlock OLD_TURKIC =
  2276             new UnicodeBlock("OLD_TURKIC",
  2277                              "OLD TURKIC",
  2278                              "OLDTURKIC");
  2279 
  2280         /**
  2281          * Constant for the "Rumi Numeral Symbols" Unicode character block.
  2282          * @since 1.7
  2283          */
  2284         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
  2285             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
  2286                              "RUMI NUMERAL SYMBOLS",
  2287                              "RUMINUMERALSYMBOLS");
  2288 
  2289         /**
  2290          * Constant for the "Brahmi" Unicode character block.
  2291          * @since 1.7
  2292          */
  2293         public static final UnicodeBlock BRAHMI =
  2294             new UnicodeBlock("BRAHMI");
  2295 
  2296         /**
  2297          * Constant for the "Kaithi" Unicode character block.
  2298          * @since 1.7
  2299          */
  2300         public static final UnicodeBlock KAITHI =
  2301             new UnicodeBlock("KAITHI");
  2302 
  2303         /**
  2304          * Constant for the "Cuneiform" Unicode character block.
  2305          * @since 1.7
  2306          */
  2307         public static final UnicodeBlock CUNEIFORM =
  2308             new UnicodeBlock("CUNEIFORM");
  2309 
  2310         /**
  2311          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
  2312          * character block.
  2313          * @since 1.7
  2314          */
  2315         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
  2316             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
  2317                              "CUNEIFORM NUMBERS AND PUNCTUATION",
  2318                              "CUNEIFORMNUMBERSANDPUNCTUATION");
  2319 
  2320         /**
  2321          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
  2322          * @since 1.7
  2323          */
  2324         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
  2325             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
  2326                              "EGYPTIAN HIEROGLYPHS",
  2327                              "EGYPTIANHIEROGLYPHS");
  2328 
  2329         /**
  2330          * Constant for the "Bamum Supplement" Unicode character block.
  2331          * @since 1.7
  2332          */
  2333         public static final UnicodeBlock BAMUM_SUPPLEMENT =
  2334             new UnicodeBlock("BAMUM_SUPPLEMENT",
  2335                              "BAMUM SUPPLEMENT",
  2336                              "BAMUMSUPPLEMENT");
  2337 
  2338         /**
  2339          * Constant for the "Kana Supplement" Unicode character block.
  2340          * @since 1.7
  2341          */
  2342         public static final UnicodeBlock KANA_SUPPLEMENT =
  2343             new UnicodeBlock("KANA_SUPPLEMENT",
  2344                              "KANA SUPPLEMENT",
  2345                              "KANASUPPLEMENT");
  2346 
  2347         /**
  2348          * Constant for the "Ancient Greek Musical Notation" Unicode character
  2349          * block.
  2350          * @since 1.7
  2351          */
  2352         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
  2353             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
  2354                              "ANCIENT GREEK MUSICAL NOTATION",
  2355                              "ANCIENTGREEKMUSICALNOTATION");
  2356 
  2357         /**
  2358          * Constant for the "Counting Rod Numerals" Unicode character block.
  2359          * @since 1.7
  2360          */
  2361         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
  2362             new UnicodeBlock("COUNTING_ROD_NUMERALS",
  2363                              "COUNTING ROD NUMERALS",
  2364                              "COUNTINGRODNUMERALS");
  2365 
  2366         /**
  2367          * Constant for the "Mahjong Tiles" Unicode character block.
  2368          * @since 1.7
  2369          */
  2370         public static final UnicodeBlock MAHJONG_TILES =
  2371             new UnicodeBlock("MAHJONG_TILES",
  2372                              "MAHJONG TILES",
  2373                              "MAHJONGTILES");
  2374 
  2375         /**
  2376          * Constant for the "Domino Tiles" Unicode character block.
  2377          * @since 1.7
  2378          */
  2379         public static final UnicodeBlock DOMINO_TILES =
  2380             new UnicodeBlock("DOMINO_TILES",
  2381                              "DOMINO TILES",
  2382                              "DOMINOTILES");
  2383 
  2384         /**
  2385          * Constant for the "Playing Cards" Unicode character block.
  2386          * @since 1.7
  2387          */
  2388         public static final UnicodeBlock PLAYING_CARDS =
  2389             new UnicodeBlock("PLAYING_CARDS",
  2390                              "PLAYING CARDS",
  2391                              "PLAYINGCARDS");
  2392 
  2393         /**
  2394          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
  2395          * block.
  2396          * @since 1.7
  2397          */
  2398         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
  2399             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
  2400                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
  2401                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
  2402 
  2403         /**
  2404          * Constant for the "Enclosed Ideographic Supplement" Unicode character
  2405          * block.
  2406          * @since 1.7
  2407          */
  2408         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
  2409             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
  2410                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
  2411                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
  2412 
  2413         /**
  2414          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
  2415          * character block.
  2416          * @since 1.7
  2417          */
  2418         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
  2419             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
  2420                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
  2421                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
  2422 
  2423         /**
  2424          * Constant for the "Emoticons" Unicode character block.
  2425          * @since 1.7
  2426          */
  2427         public static final UnicodeBlock EMOTICONS =
  2428             new UnicodeBlock("EMOTICONS");
  2429 
  2430         /**
  2431          * Constant for the "Transport And Map Symbols" Unicode character block.
  2432          * @since 1.7
  2433          */
  2434         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
  2435             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
  2436                              "TRANSPORT AND MAP SYMBOLS",
  2437                              "TRANSPORTANDMAPSYMBOLS");
  2438 
  2439         /**
  2440          * Constant for the "Alchemical Symbols" Unicode character block.
  2441          * @since 1.7
  2442          */
  2443         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
  2444             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
  2445                              "ALCHEMICAL SYMBOLS",
  2446                              "ALCHEMICALSYMBOLS");
  2447 
  2448         /**
  2449          * Constant for the "CJK Unified Ideographs Extension C" Unicode
  2450          * character block.
  2451          * @since 1.7
  2452          */
  2453         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
  2454             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
  2455                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
  2456                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
  2457 
  2458         /**
  2459          * Constant for the "CJK Unified Ideographs Extension D" Unicode
  2460          * character block.
  2461          * @since 1.7
  2462          */
  2463         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
  2464             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
  2465                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
  2466                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
  2467 
  2468         private static final int blockStarts[] = {
  2469             0x0000,   // 0000..007F; Basic Latin
  2470             0x0080,   // 0080..00FF; Latin-1 Supplement
  2471             0x0100,   // 0100..017F; Latin Extended-A
  2472             0x0180,   // 0180..024F; Latin Extended-B
  2473             0x0250,   // 0250..02AF; IPA Extensions
  2474             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
  2475             0x0300,   // 0300..036F; Combining Diacritical Marks
  2476             0x0370,   // 0370..03FF; Greek and Coptic
  2477             0x0400,   // 0400..04FF; Cyrillic
  2478             0x0500,   // 0500..052F; Cyrillic Supplement
  2479             0x0530,   // 0530..058F; Armenian
  2480             0x0590,   // 0590..05FF; Hebrew
  2481             0x0600,   // 0600..06FF; Arabic
  2482             0x0700,   // 0700..074F; Syriac
  2483             0x0750,   // 0750..077F; Arabic Supplement
  2484             0x0780,   // 0780..07BF; Thaana
  2485             0x07C0,   // 07C0..07FF; NKo
  2486             0x0800,   // 0800..083F; Samaritan
  2487             0x0840,   // 0840..085F; Mandaic
  2488             0x0860,   //             unassigned
  2489             0x0900,   // 0900..097F; Devanagari
  2490             0x0980,   // 0980..09FF; Bengali
  2491             0x0A00,   // 0A00..0A7F; Gurmukhi
  2492             0x0A80,   // 0A80..0AFF; Gujarati
  2493             0x0B00,   // 0B00..0B7F; Oriya
  2494             0x0B80,   // 0B80..0BFF; Tamil
  2495             0x0C00,   // 0C00..0C7F; Telugu
  2496             0x0C80,   // 0C80..0CFF; Kannada
  2497             0x0D00,   // 0D00..0D7F; Malayalam
  2498             0x0D80,   // 0D80..0DFF; Sinhala
  2499             0x0E00,   // 0E00..0E7F; Thai
  2500             0x0E80,   // 0E80..0EFF; Lao
  2501             0x0F00,   // 0F00..0FFF; Tibetan
  2502             0x1000,   // 1000..109F; Myanmar
  2503             0x10A0,   // 10A0..10FF; Georgian
  2504             0x1100,   // 1100..11FF; Hangul Jamo
  2505             0x1200,   // 1200..137F; Ethiopic
  2506             0x1380,   // 1380..139F; Ethiopic Supplement
  2507             0x13A0,   // 13A0..13FF; Cherokee
  2508             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
  2509             0x1680,   // 1680..169F; Ogham
  2510             0x16A0,   // 16A0..16FF; Runic
  2511             0x1700,   // 1700..171F; Tagalog
  2512             0x1720,   // 1720..173F; Hanunoo
  2513             0x1740,   // 1740..175F; Buhid
  2514             0x1760,   // 1760..177F; Tagbanwa
  2515             0x1780,   // 1780..17FF; Khmer
  2516             0x1800,   // 1800..18AF; Mongolian
  2517             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
  2518             0x1900,   // 1900..194F; Limbu
  2519             0x1950,   // 1950..197F; Tai Le
  2520             0x1980,   // 1980..19DF; New Tai Lue
  2521             0x19E0,   // 19E0..19FF; Khmer Symbols
  2522             0x1A00,   // 1A00..1A1F; Buginese
  2523             0x1A20,   // 1A20..1AAF; Tai Tham
  2524             0x1AB0,   //             unassigned
  2525             0x1B00,   // 1B00..1B7F; Balinese
  2526             0x1B80,   // 1B80..1BBF; Sundanese
  2527             0x1BC0,   // 1BC0..1BFF; Batak
  2528             0x1C00,   // 1C00..1C4F; Lepcha
  2529             0x1C50,   // 1C50..1C7F; Ol Chiki
  2530             0x1C80,   //             unassigned
  2531             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
  2532             0x1D00,   // 1D00..1D7F; Phonetic Extensions
  2533             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
  2534             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
  2535             0x1E00,   // 1E00..1EFF; Latin Extended Additional
  2536             0x1F00,   // 1F00..1FFF; Greek Extended
  2537             0x2000,   // 2000..206F; General Punctuation
  2538             0x2070,   // 2070..209F; Superscripts and Subscripts
  2539             0x20A0,   // 20A0..20CF; Currency Symbols
  2540             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
  2541             0x2100,   // 2100..214F; Letterlike Symbols
  2542             0x2150,   // 2150..218F; Number Forms
  2543             0x2190,   // 2190..21FF; Arrows
  2544             0x2200,   // 2200..22FF; Mathematical Operators
  2545             0x2300,   // 2300..23FF; Miscellaneous Technical
  2546             0x2400,   // 2400..243F; Control Pictures
  2547             0x2440,   // 2440..245F; Optical Character Recognition
  2548             0x2460,   // 2460..24FF; Enclosed Alphanumerics
  2549             0x2500,   // 2500..257F; Box Drawing
  2550             0x2580,   // 2580..259F; Block Elements
  2551             0x25A0,   // 25A0..25FF; Geometric Shapes
  2552             0x2600,   // 2600..26FF; Miscellaneous Symbols
  2553             0x2700,   // 2700..27BF; Dingbats
  2554             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
  2555             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
  2556             0x2800,   // 2800..28FF; Braille Patterns
  2557             0x2900,   // 2900..297F; Supplemental Arrows-B
  2558             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
  2559             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
  2560             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
  2561             0x2C00,   // 2C00..2C5F; Glagolitic
  2562             0x2C60,   // 2C60..2C7F; Latin Extended-C
  2563             0x2C80,   // 2C80..2CFF; Coptic
  2564             0x2D00,   // 2D00..2D2F; Georgian Supplement
  2565             0x2D30,   // 2D30..2D7F; Tifinagh
  2566             0x2D80,   // 2D80..2DDF; Ethiopic Extended
  2567             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
  2568             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
  2569             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
  2570             0x2F00,   // 2F00..2FDF; Kangxi Radicals
  2571             0x2FE0,   //             unassigned
  2572             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
  2573             0x3000,   // 3000..303F; CJK Symbols and Punctuation
  2574             0x3040,   // 3040..309F; Hiragana
  2575             0x30A0,   // 30A0..30FF; Katakana
  2576             0x3100,   // 3100..312F; Bopomofo
  2577             0x3130,   // 3130..318F; Hangul Compatibility Jamo
  2578             0x3190,   // 3190..319F; Kanbun
  2579             0x31A0,   // 31A0..31BF; Bopomofo Extended
  2580             0x31C0,   // 31C0..31EF; CJK Strokes
  2581             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
  2582             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
  2583             0x3300,   // 3300..33FF; CJK Compatibility
  2584             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
  2585             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
  2586             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
  2587             0xA000,   // A000..A48F; Yi Syllables
  2588             0xA490,   // A490..A4CF; Yi Radicals
  2589             0xA4D0,   // A4D0..A4FF; Lisu
  2590             0xA500,   // A500..A63F; Vai
  2591             0xA640,   // A640..A69F; Cyrillic Extended-B
  2592             0xA6A0,   // A6A0..A6FF; Bamum
  2593             0xA700,   // A700..A71F; Modifier Tone Letters
  2594             0xA720,   // A720..A7FF; Latin Extended-D
  2595             0xA800,   // A800..A82F; Syloti Nagri
  2596             0xA830,   // A830..A83F; Common Indic Number Forms
  2597             0xA840,   // A840..A87F; Phags-pa
  2598             0xA880,   // A880..A8DF; Saurashtra
  2599             0xA8E0,   // A8E0..A8FF; Devanagari Extended
  2600             0xA900,   // A900..A92F; Kayah Li
  2601             0xA930,   // A930..A95F; Rejang
  2602             0xA960,   // A960..A97F; Hangul Jamo Extended-A
  2603             0xA980,   // A980..A9DF; Javanese
  2604             0xA9E0,   //             unassigned
  2605             0xAA00,   // AA00..AA5F; Cham
  2606             0xAA60,   // AA60..AA7F; Myanmar Extended-A
  2607             0xAA80,   // AA80..AADF; Tai Viet
  2608             0xAAE0,   //             unassigned
  2609             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
  2610             0xAB30,   //             unassigned
  2611             0xABC0,   // ABC0..ABFF; Meetei Mayek
  2612             0xAC00,   // AC00..D7AF; Hangul Syllables
  2613             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
  2614             0xD800,   // D800..DB7F; High Surrogates
  2615             0xDB80,   // DB80..DBFF; High Private Use Surrogates
  2616             0xDC00,   // DC00..DFFF; Low Surrogates
  2617             0xE000,   // E000..F8FF; Private Use Area
  2618             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
  2619             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
  2620             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
  2621             0xFE00,   // FE00..FE0F; Variation Selectors
  2622             0xFE10,   // FE10..FE1F; Vertical Forms
  2623             0xFE20,   // FE20..FE2F; Combining Half Marks
  2624             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
  2625             0xFE50,   // FE50..FE6F; Small Form Variants
  2626             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
  2627             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
  2628             0xFFF0,   // FFF0..FFFF; Specials
  2629             0x10000,  // 10000..1007F; Linear B Syllabary
  2630             0x10080,  // 10080..100FF; Linear B Ideograms
  2631             0x10100,  // 10100..1013F; Aegean Numbers
  2632             0x10140,  // 10140..1018F; Ancient Greek Numbers
  2633             0x10190,  // 10190..101CF; Ancient Symbols
  2634             0x101D0,  // 101D0..101FF; Phaistos Disc
  2635             0x10200,  //               unassigned
  2636             0x10280,  // 10280..1029F; Lycian
  2637             0x102A0,  // 102A0..102DF; Carian
  2638             0x102E0,  //               unassigned
  2639             0x10300,  // 10300..1032F; Old Italic
  2640             0x10330,  // 10330..1034F; Gothic
  2641             0x10350,  //               unassigned
  2642             0x10380,  // 10380..1039F; Ugaritic
  2643             0x103A0,  // 103A0..103DF; Old Persian
  2644             0x103E0,  //               unassigned
  2645             0x10400,  // 10400..1044F; Deseret
  2646             0x10450,  // 10450..1047F; Shavian
  2647             0x10480,  // 10480..104AF; Osmanya
  2648             0x104B0,  //               unassigned
  2649             0x10800,  // 10800..1083F; Cypriot Syllabary
  2650             0x10840,  // 10840..1085F; Imperial Aramaic
  2651             0x10860,  //               unassigned
  2652             0x10900,  // 10900..1091F; Phoenician
  2653             0x10920,  // 10920..1093F; Lydian
  2654             0x10940,  //               unassigned
  2655             0x10A00,  // 10A00..10A5F; Kharoshthi
  2656             0x10A60,  // 10A60..10A7F; Old South Arabian
  2657             0x10A80,  //               unassigned
  2658             0x10B00,  // 10B00..10B3F; Avestan
  2659             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
  2660             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
  2661             0x10B80,  //               unassigned
  2662             0x10C00,  // 10C00..10C4F; Old Turkic
  2663             0x10C50,  //               unassigned
  2664             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
  2665             0x10E80,  //               unassigned
  2666             0x11000,  // 11000..1107F; Brahmi
  2667             0x11080,  // 11080..110CF; Kaithi
  2668             0x110D0,  //               unassigned
  2669             0x12000,  // 12000..123FF; Cuneiform
  2670             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
  2671             0x12480,  //               unassigned
  2672             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
  2673             0x13430,  //               unassigned
  2674             0x16800,  // 16800..16A3F; Bamum Supplement
  2675             0x16A40,  //               unassigned
  2676             0x1B000,  // 1B000..1B0FF; Kana Supplement
  2677             0x1B100,  //               unassigned
  2678             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
  2679             0x1D100,  // 1D100..1D1FF; Musical Symbols
  2680             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
  2681             0x1D250,  //               unassigned
  2682             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
  2683             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
  2684             0x1D380,  //               unassigned
  2685             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
  2686             0x1D800,  //               unassigned
  2687             0x1F000,  // 1F000..1F02F; Mahjong Tiles
  2688             0x1F030,  // 1F030..1F09F; Domino Tiles
  2689             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
  2690             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
  2691             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
  2692             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
  2693             0x1F600,  // 1F600..1F64F; Emoticons
  2694             0x1F650,  //               unassigned
  2695             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
  2696             0x1F700,  // 1F700..1F77F; Alchemical Symbols
  2697             0x1F780,  //               unassigned
  2698             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
  2699             0x2A6E0,  //               unassigned
  2700             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
  2701             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
  2702             0x2B820,  //               unassigned
  2703             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
  2704             0x2FA20,  //               unassigned
  2705             0xE0000,  // E0000..E007F; Tags
  2706             0xE0080,  //               unassigned
  2707             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
  2708             0xE01F0,  //               unassigned
  2709             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
  2710             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
  2711         };
  2712 
  2713         private static final UnicodeBlock[] blocks = {
  2714             BASIC_LATIN,
  2715             LATIN_1_SUPPLEMENT,
  2716             LATIN_EXTENDED_A,
  2717             LATIN_EXTENDED_B,
  2718             IPA_EXTENSIONS,
  2719             SPACING_MODIFIER_LETTERS,
  2720             COMBINING_DIACRITICAL_MARKS,
  2721             GREEK,
  2722             CYRILLIC,
  2723             CYRILLIC_SUPPLEMENTARY,
  2724             ARMENIAN,
  2725             HEBREW,
  2726             ARABIC,
  2727             SYRIAC,
  2728             ARABIC_SUPPLEMENT,
  2729             THAANA,
  2730             NKO,
  2731             SAMARITAN,
  2732             MANDAIC,
  2733             null,
  2734             DEVANAGARI,
  2735             BENGALI,
  2736             GURMUKHI,
  2737             GUJARATI,
  2738             ORIYA,
  2739             TAMIL,
  2740             TELUGU,
  2741             KANNADA,
  2742             MALAYALAM,
  2743             SINHALA,
  2744             THAI,
  2745             LAO,
  2746             TIBETAN,
  2747             MYANMAR,
  2748             GEORGIAN,
  2749             HANGUL_JAMO,
  2750             ETHIOPIC,
  2751             ETHIOPIC_SUPPLEMENT,
  2752             CHEROKEE,
  2753             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
  2754             OGHAM,
  2755             RUNIC,
  2756             TAGALOG,
  2757             HANUNOO,
  2758             BUHID,
  2759             TAGBANWA,
  2760             KHMER,
  2761             MONGOLIAN,
  2762             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
  2763             LIMBU,
  2764             TAI_LE,
  2765             NEW_TAI_LUE,
  2766             KHMER_SYMBOLS,
  2767             BUGINESE,
  2768             TAI_THAM,
  2769             null,
  2770             BALINESE,
  2771             SUNDANESE,
  2772             BATAK,
  2773             LEPCHA,
  2774             OL_CHIKI,
  2775             null,
  2776             VEDIC_EXTENSIONS,
  2777             PHONETIC_EXTENSIONS,
  2778             PHONETIC_EXTENSIONS_SUPPLEMENT,
  2779             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
  2780             LATIN_EXTENDED_ADDITIONAL,
  2781             GREEK_EXTENDED,
  2782             GENERAL_PUNCTUATION,
  2783             SUPERSCRIPTS_AND_SUBSCRIPTS,
  2784             CURRENCY_SYMBOLS,
  2785             COMBINING_MARKS_FOR_SYMBOLS,
  2786             LETTERLIKE_SYMBOLS,
  2787             NUMBER_FORMS,
  2788             ARROWS,
  2789             MATHEMATICAL_OPERATORS,
  2790             MISCELLANEOUS_TECHNICAL,
  2791             CONTROL_PICTURES,
  2792             OPTICAL_CHARACTER_RECOGNITION,
  2793             ENCLOSED_ALPHANUMERICS,
  2794             BOX_DRAWING,
  2795             BLOCK_ELEMENTS,
  2796             GEOMETRIC_SHAPES,
  2797             MISCELLANEOUS_SYMBOLS,
  2798             DINGBATS,
  2799             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
  2800             SUPPLEMENTAL_ARROWS_A,
  2801             BRAILLE_PATTERNS,
  2802             SUPPLEMENTAL_ARROWS_B,
  2803             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
  2804             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
  2805             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
  2806             GLAGOLITIC,
  2807             LATIN_EXTENDED_C,
  2808             COPTIC,
  2809             GEORGIAN_SUPPLEMENT,
  2810             TIFINAGH,
  2811             ETHIOPIC_EXTENDED,
  2812             CYRILLIC_EXTENDED_A,
  2813             SUPPLEMENTAL_PUNCTUATION,
  2814             CJK_RADICALS_SUPPLEMENT,
  2815             KANGXI_RADICALS,
  2816             null,
  2817             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
  2818             CJK_SYMBOLS_AND_PUNCTUATION,
  2819             HIRAGANA,
  2820             KATAKANA,
  2821             BOPOMOFO,
  2822             HANGUL_COMPATIBILITY_JAMO,
  2823             KANBUN,
  2824             BOPOMOFO_EXTENDED,
  2825             CJK_STROKES,
  2826             KATAKANA_PHONETIC_EXTENSIONS,
  2827             ENCLOSED_CJK_LETTERS_AND_MONTHS,
  2828             CJK_COMPATIBILITY,
  2829             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
  2830             YIJING_HEXAGRAM_SYMBOLS,
  2831             CJK_UNIFIED_IDEOGRAPHS,
  2832             YI_SYLLABLES,
  2833             YI_RADICALS,
  2834             LISU,
  2835             VAI,
  2836             CYRILLIC_EXTENDED_B,
  2837             BAMUM,
  2838             MODIFIER_TONE_LETTERS,
  2839             LATIN_EXTENDED_D,
  2840             SYLOTI_NAGRI,
  2841             COMMON_INDIC_NUMBER_FORMS,
  2842             PHAGS_PA,
  2843             SAURASHTRA,
  2844             DEVANAGARI_EXTENDED,
  2845             KAYAH_LI,
  2846             REJANG,
  2847             HANGUL_JAMO_EXTENDED_A,
  2848             JAVANESE,
  2849             null,
  2850             CHAM,
  2851             MYANMAR_EXTENDED_A,
  2852             TAI_VIET,
  2853             null,
  2854             ETHIOPIC_EXTENDED_A,
  2855             null,
  2856             MEETEI_MAYEK,
  2857             HANGUL_SYLLABLES,
  2858             HANGUL_JAMO_EXTENDED_B,
  2859             HIGH_SURROGATES,
  2860             HIGH_PRIVATE_USE_SURROGATES,
  2861             LOW_SURROGATES,
  2862             PRIVATE_USE_AREA,
  2863             CJK_COMPATIBILITY_IDEOGRAPHS,
  2864             ALPHABETIC_PRESENTATION_FORMS,
  2865             ARABIC_PRESENTATION_FORMS_A,
  2866             VARIATION_SELECTORS,
  2867             VERTICAL_FORMS,
  2868             COMBINING_HALF_MARKS,
  2869             CJK_COMPATIBILITY_FORMS,
  2870             SMALL_FORM_VARIANTS,
  2871             ARABIC_PRESENTATION_FORMS_B,
  2872             HALFWIDTH_AND_FULLWIDTH_FORMS,
  2873             SPECIALS,
  2874             LINEAR_B_SYLLABARY,
  2875             LINEAR_B_IDEOGRAMS,
  2876             AEGEAN_NUMBERS,
  2877             ANCIENT_GREEK_NUMBERS,
  2878             ANCIENT_SYMBOLS,
  2879             PHAISTOS_DISC,
  2880             null,
  2881             LYCIAN,
  2882             CARIAN,
  2883             null,
  2884             OLD_ITALIC,
  2885             GOTHIC,
  2886             null,
  2887             UGARITIC,
  2888             OLD_PERSIAN,
  2889             null,
  2890             DESERET,
  2891             SHAVIAN,
  2892             OSMANYA,
  2893             null,
  2894             CYPRIOT_SYLLABARY,
  2895             IMPERIAL_ARAMAIC,
  2896             null,
  2897             PHOENICIAN,
  2898             LYDIAN,
  2899             null,
  2900             KHAROSHTHI,
  2901             OLD_SOUTH_ARABIAN,
  2902             null,
  2903             AVESTAN,
  2904             INSCRIPTIONAL_PARTHIAN,
  2905             INSCRIPTIONAL_PAHLAVI,
  2906             null,
  2907             OLD_TURKIC,
  2908             null,
  2909             RUMI_NUMERAL_SYMBOLS,
  2910             null,
  2911             BRAHMI,
  2912             KAITHI,
  2913             null,
  2914             CUNEIFORM,
  2915             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
  2916             null,
  2917             EGYPTIAN_HIEROGLYPHS,
  2918             null,
  2919             BAMUM_SUPPLEMENT,
  2920             null,
  2921             KANA_SUPPLEMENT,
  2922             null,
  2923             BYZANTINE_MUSICAL_SYMBOLS,
  2924             MUSICAL_SYMBOLS,
  2925             ANCIENT_GREEK_MUSICAL_NOTATION,
  2926             null,
  2927             TAI_XUAN_JING_SYMBOLS,
  2928             COUNTING_ROD_NUMERALS,
  2929             null,
  2930             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
  2931             null,
  2932             MAHJONG_TILES,
  2933             DOMINO_TILES,
  2934             PLAYING_CARDS,
  2935             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
  2936             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
  2937             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
  2938             EMOTICONS,
  2939             null,
  2940             TRANSPORT_AND_MAP_SYMBOLS,
  2941             ALCHEMICAL_SYMBOLS,
  2942             null,
  2943             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
  2944             null,
  2945             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
  2946             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
  2947             null,
  2948             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
  2949             null,
  2950             TAGS,
  2951             null,
  2952             VARIATION_SELECTORS_SUPPLEMENT,
  2953             null,
  2954             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
  2955             SUPPLEMENTARY_PRIVATE_USE_AREA_B
  2956         };
  2957 
  2958 
  2959         /**
  2960          * Returns the object representing the Unicode block containing the
  2961          * given character, or {@code null} if the character is not a
  2962          * member of a defined block.
  2963          *
  2964          * <p><b>Note:</b> This method cannot handle
  2965          * <a href="Character.html#supplementary"> supplementary
  2966          * characters</a>.  To support all Unicode characters, including
  2967          * supplementary characters, use the {@link #of(int)} method.
  2968          *
  2969          * @param   c  The character in question
  2970          * @return  The {@code UnicodeBlock} instance representing the
  2971          *          Unicode block of which this character is a member, or
  2972          *          {@code null} if the character is not a member of any
  2973          *          Unicode block
  2974          */
  2975         public static UnicodeBlock of(char c) {
  2976             return of((int)c);
  2977         }
  2978 
  2979         /**
  2980          * Returns the object representing the Unicode block
  2981          * containing the given character (Unicode code point), or
  2982          * {@code null} if the character is not a member of a
  2983          * defined block.
  2984          *
  2985          * @param   codePoint the character (Unicode code point) in question.
  2986          * @return  The {@code UnicodeBlock} instance representing the
  2987          *          Unicode block of which this character is a member, or
  2988          *          {@code null} if the character is not a member of any
  2989          *          Unicode block
  2990          * @exception IllegalArgumentException if the specified
  2991          * {@code codePoint} is an invalid Unicode code point.
  2992          * @see Character#isValidCodePoint(int)
  2993          * @since   1.5
  2994          */
  2995         public static UnicodeBlock of(int codePoint) {
  2996             if (!isValidCodePoint(codePoint)) {
  2997                 throw new IllegalArgumentException();
  2998             }
  2999 
  3000             int top, bottom, current;
  3001             bottom = 0;
  3002             top = blockStarts.length;
  3003             current = top/2;
  3004 
  3005             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
  3006             while (top - bottom > 1) {
  3007                 if (codePoint >= blockStarts[current]) {
  3008                     bottom = current;
  3009                 } else {
  3010                     top = current;
  3011                 }
  3012                 current = (top + bottom) / 2;
  3013             }
  3014             return blocks[current];
  3015         }
  3016 
  3017         /**
  3018          * Returns the UnicodeBlock with the given name. Block
  3019          * names are determined by The Unicode Standard. The file
  3020          * Blocks-&lt;version&gt;.txt defines blocks for a particular
  3021          * version of the standard. The {@link Character} class specifies
  3022          * the version of the standard that it supports.
  3023          * <p>
  3024          * This method accepts block names in the following forms:
  3025          * <ol>
  3026          * <li> Canonical block names as defined by the Unicode Standard.
  3027          * For example, the standard defines a "Basic Latin" block. Therefore, this
  3028          * method accepts "Basic Latin" as a valid block name. The documentation of
  3029          * each UnicodeBlock provides the canonical name.
  3030          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
  3031          * is a valid block name for the "Basic Latin" block.
  3032          * <li>The text representation of each constant UnicodeBlock identifier.
  3033          * For example, this method will return the {@link #BASIC_LATIN} block if
  3034          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
  3035          * hyphens in the canonical name with underscores.
  3036          * </ol>
  3037          * Finally, character case is ignored for all of the valid block name forms.
  3038          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
  3039          * The en_US locale's case mapping rules are used to provide case-insensitive
  3040          * string comparisons for block name validation.
  3041          * <p>
  3042          * If the Unicode Standard changes block names, both the previous and
  3043          * current names will be accepted.
  3044          *
  3045          * @param blockName A {@code UnicodeBlock} name.
  3046          * @return The {@code UnicodeBlock} instance identified
  3047          *         by {@code blockName}
  3048          * @throws IllegalArgumentException if {@code blockName} is an
  3049          *         invalid name
  3050          * @throws NullPointerException if {@code blockName} is null
  3051          * @since 1.5
  3052          */
  3053         public static final UnicodeBlock forName(String blockName) {
  3054             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
  3055             if (block == null) {
  3056                 throw new IllegalArgumentException();
  3057             }
  3058             return block;
  3059         }
  3060     }
  3061 
  3062 
  3063     /**
  3064      * A family of character subsets representing the character scripts
  3065      * defined in the <a href="http://www.unicode.org/reports/tr24/">
  3066      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
  3067      * character is assigned to a single Unicode script, either a specific
  3068      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
  3069      * one of the following three special values,
  3070      * {@link Character.UnicodeScript#INHERITED Inherited},
  3071      * {@link Character.UnicodeScript#COMMON Common} or
  3072      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
  3073      *
  3074      * @since 1.7
  3075      */
  3076     public static enum UnicodeScript {
  3077         /**
  3078          * Unicode script "Common".
  3079          */
  3080         COMMON,
  3081 
  3082         /**
  3083          * Unicode script "Latin".
  3084          */
  3085         LATIN,
  3086 
  3087         /**
  3088          * Unicode script "Greek".
  3089          */
  3090         GREEK,
  3091 
  3092         /**
  3093          * Unicode script "Cyrillic".
  3094          */
  3095         CYRILLIC,
  3096 
  3097         /**
  3098          * Unicode script "Armenian".
  3099          */
  3100         ARMENIAN,
  3101 
  3102         /**
  3103          * Unicode script "Hebrew".
  3104          */
  3105         HEBREW,
  3106 
  3107         /**
  3108          * Unicode script "Arabic".
  3109          */
  3110         ARABIC,
  3111 
  3112         /**
  3113          * Unicode script "Syriac".
  3114          */
  3115         SYRIAC,
  3116 
  3117         /**
  3118          * Unicode script "Thaana".
  3119          */
  3120         THAANA,
  3121 
  3122         /**
  3123          * Unicode script "Devanagari".
  3124          */
  3125         DEVANAGARI,
  3126 
  3127         /**
  3128          * Unicode script "Bengali".
  3129          */
  3130         BENGALI,
  3131 
  3132         /**
  3133          * Unicode script "Gurmukhi".
  3134          */
  3135         GURMUKHI,
  3136 
  3137         /**
  3138          * Unicode script "Gujarati".
  3139          */
  3140         GUJARATI,
  3141 
  3142         /**
  3143          * Unicode script "Oriya".
  3144          */
  3145         ORIYA,
  3146 
  3147         /**
  3148          * Unicode script "Tamil".
  3149          */
  3150         TAMIL,
  3151 
  3152         /**
  3153          * Unicode script "Telugu".
  3154          */
  3155         TELUGU,
  3156 
  3157         /**
  3158          * Unicode script "Kannada".
  3159          */
  3160         KANNADA,
  3161 
  3162         /**
  3163          * Unicode script "Malayalam".
  3164          */
  3165         MALAYALAM,
  3166 
  3167         /**
  3168          * Unicode script "Sinhala".
  3169          */
  3170         SINHALA,
  3171 
  3172         /**
  3173          * Unicode script "Thai".
  3174          */
  3175         THAI,
  3176 
  3177         /**
  3178          * Unicode script "Lao".
  3179          */
  3180         LAO,
  3181 
  3182         /**
  3183          * Unicode script "Tibetan".
  3184          */
  3185         TIBETAN,
  3186 
  3187         /**
  3188          * Unicode script "Myanmar".
  3189          */
  3190         MYANMAR,
  3191 
  3192         /**
  3193          * Unicode script "Georgian".
  3194          */
  3195         GEORGIAN,
  3196 
  3197         /**
  3198          * Unicode script "Hangul".
  3199          */
  3200         HANGUL,
  3201 
  3202         /**
  3203          * Unicode script "Ethiopic".
  3204          */
  3205         ETHIOPIC,
  3206 
  3207         /**
  3208          * Unicode script "Cherokee".
  3209          */
  3210         CHEROKEE,
  3211 
  3212         /**
  3213          * Unicode script "Canadian_Aboriginal".
  3214          */
  3215         CANADIAN_ABORIGINAL,
  3216 
  3217         /**
  3218          * Unicode script "Ogham".
  3219          */
  3220         OGHAM,
  3221 
  3222         /**
  3223          * Unicode script "Runic".
  3224          */
  3225         RUNIC,
  3226 
  3227         /**
  3228          * Unicode script "Khmer".
  3229          */
  3230         KHMER,
  3231 
  3232         /**
  3233          * Unicode script "Mongolian".
  3234          */
  3235         MONGOLIAN,
  3236 
  3237         /**
  3238          * Unicode script "Hiragana".
  3239          */
  3240         HIRAGANA,
  3241 
  3242         /**
  3243          * Unicode script "Katakana".
  3244          */
  3245         KATAKANA,
  3246 
  3247         /**
  3248          * Unicode script "Bopomofo".
  3249          */
  3250         BOPOMOFO,
  3251 
  3252         /**
  3253          * Unicode script "Han".
  3254          */
  3255         HAN,
  3256 
  3257         /**
  3258          * Unicode script "Yi".
  3259          */
  3260         YI,
  3261 
  3262         /**
  3263          * Unicode script "Old_Italic".
  3264          */
  3265         OLD_ITALIC,
  3266 
  3267         /**
  3268          * Unicode script "Gothic".
  3269          */
  3270         GOTHIC,
  3271 
  3272         /**
  3273          * Unicode script "Deseret".
  3274          */
  3275         DESERET,
  3276 
  3277         /**
  3278          * Unicode script "Inherited".
  3279          */
  3280         INHERITED,
  3281 
  3282         /**
  3283          * Unicode script "Tagalog".
  3284          */
  3285         TAGALOG,
  3286 
  3287         /**
  3288          * Unicode script "Hanunoo".
  3289          */
  3290         HANUNOO,
  3291 
  3292         /**
  3293          * Unicode script "Buhid".
  3294          */
  3295         BUHID,
  3296 
  3297         /**
  3298          * Unicode script "Tagbanwa".
  3299          */
  3300         TAGBANWA,
  3301 
  3302         /**
  3303          * Unicode script "Limbu".
  3304          */
  3305         LIMBU,
  3306 
  3307         /**
  3308          * Unicode script "Tai_Le".
  3309          */
  3310         TAI_LE,
  3311 
  3312         /**
  3313          * Unicode script "Linear_B".
  3314          */
  3315         LINEAR_B,
  3316 
  3317         /**
  3318          * Unicode script "Ugaritic".
  3319          */
  3320         UGARITIC,
  3321 
  3322         /**
  3323          * Unicode script "Shavian".
  3324          */
  3325         SHAVIAN,
  3326 
  3327         /**
  3328          * Unicode script "Osmanya".
  3329          */
  3330         OSMANYA,
  3331 
  3332         /**
  3333          * Unicode script "Cypriot".
  3334          */
  3335         CYPRIOT,
  3336 
  3337         /**
  3338          * Unicode script "Braille".
  3339          */
  3340         BRAILLE,
  3341 
  3342         /**
  3343          * Unicode script "Buginese".
  3344          */
  3345         BUGINESE,
  3346 
  3347         /**
  3348          * Unicode script "Coptic".
  3349          */
  3350         COPTIC,
  3351 
  3352         /**
  3353          * Unicode script "New_Tai_Lue".
  3354          */
  3355         NEW_TAI_LUE,
  3356 
  3357         /**
  3358          * Unicode script "Glagolitic".
  3359          */
  3360         GLAGOLITIC,
  3361 
  3362         /**
  3363          * Unicode script "Tifinagh".
  3364          */
  3365         TIFINAGH,
  3366 
  3367         /**
  3368          * Unicode script "Syloti_Nagri".
  3369          */
  3370         SYLOTI_NAGRI,
  3371 
  3372         /**
  3373          * Unicode script "Old_Persian".
  3374          */
  3375         OLD_PERSIAN,
  3376 
  3377         /**
  3378          * Unicode script "Kharoshthi".
  3379          */
  3380         KHAROSHTHI,
  3381 
  3382         /**
  3383          * Unicode script "Balinese".
  3384          */
  3385         BALINESE,
  3386 
  3387         /**
  3388          * Unicode script "Cuneiform".
  3389          */
  3390         CUNEIFORM,
  3391 
  3392         /**
  3393          * Unicode script "Phoenician".
  3394          */
  3395         PHOENICIAN,
  3396 
  3397         /**
  3398          * Unicode script "Phags_Pa".
  3399          */
  3400         PHAGS_PA,
  3401 
  3402         /**
  3403          * Unicode script "Nko".
  3404          */
  3405         NKO,
  3406 
  3407         /**
  3408          * Unicode script "Sundanese".
  3409          */
  3410         SUNDANESE,
  3411 
  3412         /**
  3413          * Unicode script "Batak".
  3414          */
  3415         BATAK,
  3416 
  3417         /**
  3418          * Unicode script "Lepcha".
  3419          */
  3420         LEPCHA,
  3421 
  3422         /**
  3423          * Unicode script "Ol_Chiki".
  3424          */
  3425         OL_CHIKI,
  3426 
  3427         /**
  3428          * Unicode script "Vai".
  3429          */
  3430         VAI,
  3431 
  3432         /**
  3433          * Unicode script "Saurashtra".
  3434          */
  3435         SAURASHTRA,
  3436 
  3437         /**
  3438          * Unicode script "Kayah_Li".
  3439          */
  3440         KAYAH_LI,
  3441 
  3442         /**
  3443          * Unicode script "Rejang".
  3444          */
  3445         REJANG,
  3446 
  3447         /**
  3448          * Unicode script "Lycian".
  3449          */
  3450         LYCIAN,
  3451 
  3452         /**
  3453          * Unicode script "Carian".
  3454          */
  3455         CARIAN,
  3456 
  3457         /**
  3458          * Unicode script "Lydian".
  3459          */
  3460         LYDIAN,
  3461 
  3462         /**
  3463          * Unicode script "Cham".
  3464          */
  3465         CHAM,
  3466 
  3467         /**
  3468          * Unicode script "Tai_Tham".
  3469          */
  3470         TAI_THAM,
  3471 
  3472         /**
  3473          * Unicode script "Tai_Viet".
  3474          */
  3475         TAI_VIET,
  3476 
  3477         /**
  3478          * Unicode script "Avestan".
  3479          */
  3480         AVESTAN,
  3481 
  3482         /**
  3483          * Unicode script "Egyptian_Hieroglyphs".
  3484          */
  3485         EGYPTIAN_HIEROGLYPHS,
  3486 
  3487         /**
  3488          * Unicode script "Samaritan".
  3489          */
  3490         SAMARITAN,
  3491 
  3492         /**
  3493          * Unicode script "Mandaic".
  3494          */
  3495         MANDAIC,
  3496 
  3497         /**
  3498          * Unicode script "Lisu".
  3499          */
  3500         LISU,
  3501 
  3502         /**
  3503          * Unicode script "Bamum".
  3504          */
  3505         BAMUM,
  3506 
  3507         /**
  3508          * Unicode script "Javanese".
  3509          */
  3510         JAVANESE,
  3511 
  3512         /**
  3513          * Unicode script "Meetei_Mayek".
  3514          */
  3515         MEETEI_MAYEK,
  3516 
  3517         /**
  3518          * Unicode script "Imperial_Aramaic".
  3519          */
  3520         IMPERIAL_ARAMAIC,
  3521 
  3522         /**
  3523          * Unicode script "Old_South_Arabian".
  3524          */
  3525         OLD_SOUTH_ARABIAN,
  3526 
  3527         /**
  3528          * Unicode script "Inscriptional_Parthian".
  3529          */
  3530         INSCRIPTIONAL_PARTHIAN,
  3531 
  3532         /**
  3533          * Unicode script "Inscriptional_Pahlavi".
  3534          */
  3535         INSCRIPTIONAL_PAHLAVI,
  3536 
  3537         /**
  3538          * Unicode script "Old_Turkic".
  3539          */
  3540         OLD_TURKIC,
  3541 
  3542         /**
  3543          * Unicode script "Brahmi".
  3544          */
  3545         BRAHMI,
  3546 
  3547         /**
  3548          * Unicode script "Kaithi".
  3549          */
  3550         KAITHI,
  3551 
  3552         /**
  3553          * Unicode script "Unknown".
  3554          */
  3555         UNKNOWN;
  3556 
  3557         private static final int[] scriptStarts = {
  3558             0x0000,   // 0000..0040; COMMON
  3559             0x0041,   // 0041..005A; LATIN
  3560             0x005B,   // 005B..0060; COMMON
  3561             0x0061,   // 0061..007A; LATIN
  3562             0x007B,   // 007B..00A9; COMMON
  3563             0x00AA,   // 00AA..00AA; LATIN
  3564             0x00AB,   // 00AB..00B9; COMMON
  3565             0x00BA,   // 00BA..00BA; LATIN
  3566             0x00BB,   // 00BB..00BF; COMMON
  3567             0x00C0,   // 00C0..00D6; LATIN
  3568             0x00D7,   // 00D7..00D7; COMMON
  3569             0x00D8,   // 00D8..00F6; LATIN
  3570             0x00F7,   // 00F7..00F7; COMMON
  3571             0x00F8,   // 00F8..02B8; LATIN
  3572             0x02B9,   // 02B9..02DF; COMMON
  3573             0x02E0,   // 02E0..02E4; LATIN
  3574             0x02E5,   // 02E5..02E9; COMMON
  3575             0x02EA,   // 02EA..02EB; BOPOMOFO
  3576             0x02EC,   // 02EC..02FF; COMMON
  3577             0x0300,   // 0300..036F; INHERITED
  3578             0x0370,   // 0370..0373; GREEK
  3579             0x0374,   // 0374..0374; COMMON
  3580             0x0375,   // 0375..037D; GREEK
  3581             0x037E,   // 037E..0383; COMMON
  3582             0x0384,   // 0384..0384; GREEK
  3583             0x0385,   // 0385..0385; COMMON
  3584             0x0386,   // 0386..0386; GREEK
  3585             0x0387,   // 0387..0387; COMMON
  3586             0x0388,   // 0388..03E1; GREEK
  3587             0x03E2,   // 03E2..03EF; COPTIC
  3588             0x03F0,   // 03F0..03FF; GREEK
  3589             0x0400,   // 0400..0484; CYRILLIC
  3590             0x0485,   // 0485..0486; INHERITED
  3591             0x0487,   // 0487..0530; CYRILLIC
  3592             0x0531,   // 0531..0588; ARMENIAN
  3593             0x0589,   // 0589..0589; COMMON
  3594             0x058A,   // 058A..0590; ARMENIAN
  3595             0x0591,   // 0591..05FF; HEBREW
  3596             0x0600,   // 0600..060B; ARABIC
  3597             0x060C,   // 060C..060C; COMMON
  3598             0x060D,   // 060D..061A; ARABIC
  3599             0x061B,   // 061B..061D; COMMON
  3600             0x061E,   // 061E..061E; ARABIC
  3601             0x061F,   // 061F..061F; COMMON
  3602             0x0620,   // 0620..063F; ARABIC
  3603             0x0640,   // 0640..0640; COMMON
  3604             0x0641,   // 0641..064A; ARABIC
  3605             0x064B,   // 064B..0655; INHERITED
  3606             0x0656,   // 0656..065E; ARABIC
  3607             0x065F,   // 065F..065F; INHERITED
  3608             0x0660,   // 0660..0669; COMMON
  3609             0x066A,   // 066A..066F; ARABIC
  3610             0x0670,   // 0670..0670; INHERITED
  3611             0x0671,   // 0671..06DC; ARABIC
  3612             0x06DD,   // 06DD..06DD; COMMON
  3613             0x06DE,   // 06DE..06FF; ARABIC
  3614             0x0700,   // 0700..074F; SYRIAC
  3615             0x0750,   // 0750..077F; ARABIC
  3616             0x0780,   // 0780..07BF; THAANA
  3617             0x07C0,   // 07C0..07FF; NKO
  3618             0x0800,   // 0800..083F; SAMARITAN
  3619             0x0840,   // 0840..08FF; MANDAIC
  3620             0x0900,   // 0900..0950; DEVANAGARI
  3621             0x0951,   // 0951..0952; INHERITED
  3622             0x0953,   // 0953..0963; DEVANAGARI
  3623             0x0964,   // 0964..0965; COMMON
  3624             0x0966,   // 0966..096F; DEVANAGARI
  3625             0x0970,   // 0970..0970; COMMON
  3626             0x0971,   // 0971..0980; DEVANAGARI
  3627             0x0981,   // 0981..0A00; BENGALI
  3628             0x0A01,   // 0A01..0A80; GURMUKHI
  3629             0x0A81,   // 0A81..0B00; GUJARATI
  3630             0x0B01,   // 0B01..0B81; ORIYA
  3631             0x0B82,   // 0B82..0C00; TAMIL
  3632             0x0C01,   // 0C01..0C81; TELUGU
  3633             0x0C82,   // 0C82..0CF0; KANNADA
  3634             0x0D02,   // 0D02..0D81; MALAYALAM
  3635             0x0D82,   // 0D82..0E00; SINHALA
  3636             0x0E01,   // 0E01..0E3E; THAI
  3637             0x0E3F,   // 0E3F..0E3F; COMMON
  3638             0x0E40,   // 0E40..0E80; THAI
  3639             0x0E81,   // 0E81..0EFF; LAO
  3640             0x0F00,   // 0F00..0FD4; TIBETAN
  3641             0x0FD5,   // 0FD5..0FD8; COMMON
  3642             0x0FD9,   // 0FD9..0FFF; TIBETAN
  3643             0x1000,   // 1000..109F; MYANMAR
  3644             0x10A0,   // 10A0..10FA; GEORGIAN
  3645             0x10FB,   // 10FB..10FB; COMMON
  3646             0x10FC,   // 10FC..10FF; GEORGIAN
  3647             0x1100,   // 1100..11FF; HANGUL
  3648             0x1200,   // 1200..139F; ETHIOPIC
  3649             0x13A0,   // 13A0..13FF; CHEROKEE
  3650             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
  3651             0x1680,   // 1680..169F; OGHAM
  3652             0x16A0,   // 16A0..16EA; RUNIC
  3653             0x16EB,   // 16EB..16ED; COMMON
  3654             0x16EE,   // 16EE..16FF; RUNIC
  3655             0x1700,   // 1700..171F; TAGALOG
  3656             0x1720,   // 1720..1734; HANUNOO
  3657             0x1735,   // 1735..173F; COMMON
  3658             0x1740,   // 1740..175F; BUHID
  3659             0x1760,   // 1760..177F; TAGBANWA
  3660             0x1780,   // 1780..17FF; KHMER
  3661             0x1800,   // 1800..1801; MONGOLIAN
  3662             0x1802,   // 1802..1803; COMMON
  3663             0x1804,   // 1804..1804; MONGOLIAN
  3664             0x1805,   // 1805..1805; COMMON
  3665             0x1806,   // 1806..18AF; MONGOLIAN
  3666             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
  3667             0x1900,   // 1900..194F; LIMBU
  3668             0x1950,   // 1950..197F; TAI_LE
  3669             0x1980,   // 1980..19DF; NEW_TAI_LUE
  3670             0x19E0,   // 19E0..19FF; KHMER
  3671             0x1A00,   // 1A00..1A1F; BUGINESE
  3672             0x1A20,   // 1A20..1AFF; TAI_THAM
  3673             0x1B00,   // 1B00..1B7F; BALINESE
  3674             0x1B80,   // 1B80..1BBF; SUNDANESE
  3675             0x1BC0,   // 1BC0..1BFF; BATAK
  3676             0x1C00,   // 1C00..1C4F; LEPCHA
  3677             0x1C50,   // 1C50..1CCF; OL_CHIKI
  3678             0x1CD0,   // 1CD0..1CD2; INHERITED
  3679             0x1CD3,   // 1CD3..1CD3; COMMON
  3680             0x1CD4,   // 1CD4..1CE0; INHERITED
  3681             0x1CE1,   // 1CE1..1CE1; COMMON
  3682             0x1CE2,   // 1CE2..1CE8; INHERITED
  3683             0x1CE9,   // 1CE9..1CEC; COMMON
  3684             0x1CED,   // 1CED..1CED; INHERITED
  3685             0x1CEE,   // 1CEE..1CFF; COMMON
  3686             0x1D00,   // 1D00..1D25; LATIN
  3687             0x1D26,   // 1D26..1D2A; GREEK
  3688             0x1D2B,   // 1D2B..1D2B; CYRILLIC
  3689             0x1D2C,   // 1D2C..1D5C; LATIN
  3690             0x1D5D,   // 1D5D..1D61; GREEK
  3691             0x1D62,   // 1D62..1D65; LATIN
  3692             0x1D66,   // 1D66..1D6A; GREEK
  3693             0x1D6B,   // 1D6B..1D77; LATIN
  3694             0x1D78,   // 1D78..1D78; CYRILLIC
  3695             0x1D79,   // 1D79..1DBE; LATIN
  3696             0x1DBF,   // 1DBF..1DBF; GREEK
  3697             0x1DC0,   // 1DC0..1DFF; INHERITED
  3698             0x1E00,   // 1E00..1EFF; LATIN
  3699             0x1F00,   // 1F00..1FFF; GREEK
  3700             0x2000,   // 2000..200B; COMMON
  3701             0x200C,   // 200C..200D; INHERITED
  3702             0x200E,   // 200E..2070; COMMON
  3703             0x2071,   // 2071..2073; LATIN
  3704             0x2074,   // 2074..207E; COMMON
  3705             0x207F,   // 207F..207F; LATIN
  3706             0x2080,   // 2080..208F; COMMON
  3707             0x2090,   // 2090..209F; LATIN
  3708             0x20A0,   // 20A0..20CF; COMMON
  3709             0x20D0,   // 20D0..20FF; INHERITED
  3710             0x2100,   // 2100..2125; COMMON
  3711             0x2126,   // 2126..2126; GREEK
  3712             0x2127,   // 2127..2129; COMMON
  3713             0x212A,   // 212A..212B; LATIN
  3714             0x212C,   // 212C..2131; COMMON
  3715             0x2132,   // 2132..2132; LATIN
  3716             0x2133,   // 2133..214D; COMMON
  3717             0x214E,   // 214E..214E; LATIN
  3718             0x214F,   // 214F..215F; COMMON
  3719             0x2160,   // 2160..2188; LATIN
  3720             0x2189,   // 2189..27FF; COMMON
  3721             0x2800,   // 2800..28FF; BRAILLE
  3722             0x2900,   // 2900..2BFF; COMMON
  3723             0x2C00,   // 2C00..2C5F; GLAGOLITIC
  3724             0x2C60,   // 2C60..2C7F; LATIN
  3725             0x2C80,   // 2C80..2CFF; COPTIC
  3726             0x2D00,   // 2D00..2D2F; GEORGIAN
  3727             0x2D30,   // 2D30..2D7F; TIFINAGH
  3728             0x2D80,   // 2D80..2DDF; ETHIOPIC
  3729             0x2DE0,   // 2DE0..2DFF; CYRILLIC
  3730             0x2E00,   // 2E00..2E7F; COMMON
  3731             0x2E80,   // 2E80..2FEF; HAN
  3732             0x2FF0,   // 2FF0..3004; COMMON
  3733             0x3005,   // 3005..3005; HAN
  3734             0x3006,   // 3006..3006; COMMON
  3735             0x3007,   // 3007..3007; HAN
  3736             0x3008,   // 3008..3020; COMMON
  3737             0x3021,   // 3021..3029; HAN
  3738             0x302A,   // 302A..302D; INHERITED
  3739             0x302E,   // 302E..302F; HANGUL
  3740             0x3030,   // 3030..3037; COMMON
  3741             0x3038,   // 3038..303B; HAN
  3742             0x303C,   // 303C..3040; COMMON
  3743             0x3041,   // 3041..3098; HIRAGANA
  3744             0x3099,   // 3099..309A; INHERITED
  3745             0x309B,   // 309B..309C; COMMON
  3746             0x309D,   // 309D..309F; HIRAGANA
  3747             0x30A0,   // 30A0..30A0; COMMON
  3748             0x30A1,   // 30A1..30FA; KATAKANA
  3749             0x30FB,   // 30FB..30FC; COMMON
  3750             0x30FD,   // 30FD..3104; KATAKANA
  3751             0x3105,   // 3105..3130; BOPOMOFO
  3752             0x3131,   // 3131..318F; HANGUL
  3753             0x3190,   // 3190..319F; COMMON
  3754             0x31A0,   // 31A0..31BF; BOPOMOFO
  3755             0x31C0,   // 31C0..31EF; COMMON
  3756             0x31F0,   // 31F0..31FF; KATAKANA
  3757             0x3200,   // 3200..321F; HANGUL
  3758             0x3220,   // 3220..325F; COMMON
  3759             0x3260,   // 3260..327E; HANGUL
  3760             0x327F,   // 327F..32CF; COMMON
  3761             0x32D0,   // 32D0..3357; KATAKANA
  3762             0x3358,   // 3358..33FF; COMMON
  3763             0x3400,   // 3400..4DBF; HAN
  3764             0x4DC0,   // 4DC0..4DFF; COMMON
  3765             0x4E00,   // 4E00..9FFF; HAN
  3766             0xA000,   // A000..A4CF; YI
  3767             0xA4D0,   // A4D0..A4FF; LISU
  3768             0xA500,   // A500..A63F; VAI
  3769             0xA640,   // A640..A69F; CYRILLIC
  3770             0xA6A0,   // A6A0..A6FF; BAMUM
  3771             0xA700,   // A700..A721; COMMON
  3772             0xA722,   // A722..A787; LATIN
  3773             0xA788,   // A788..A78A; COMMON
  3774             0xA78B,   // A78B..A7FF; LATIN
  3775             0xA800,   // A800..A82F; SYLOTI_NAGRI
  3776             0xA830,   // A830..A83F; COMMON
  3777             0xA840,   // A840..A87F; PHAGS_PA
  3778             0xA880,   // A880..A8DF; SAURASHTRA
  3779             0xA8E0,   // A8E0..A8FF; DEVANAGARI
  3780             0xA900,   // A900..A92F; KAYAH_LI
  3781             0xA930,   // A930..A95F; REJANG
  3782             0xA960,   // A960..A97F; HANGUL
  3783             0xA980,   // A980..A9FF; JAVANESE
  3784             0xAA00,   // AA00..AA5F; CHAM
  3785             0xAA60,   // AA60..AA7F; MYANMAR
  3786             0xAA80,   // AA80..AB00; TAI_VIET
  3787             0xAB01,   // AB01..ABBF; ETHIOPIC
  3788             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
  3789             0xAC00,   // AC00..D7FB; HANGUL
  3790             0xD7FC,   // D7FC..F8FF; UNKNOWN
  3791             0xF900,   // F900..FAFF; HAN
  3792             0xFB00,   // FB00..FB12; LATIN
  3793             0xFB13,   // FB13..FB1C; ARMENIAN
  3794             0xFB1D,   // FB1D..FB4F; HEBREW
  3795             0xFB50,   // FB50..FD3D; ARABIC
  3796             0xFD3E,   // FD3E..FD4F; COMMON
  3797             0xFD50,   // FD50..FDFC; ARABIC
  3798             0xFDFD,   // FDFD..FDFF; COMMON
  3799             0xFE00,   // FE00..FE0F; INHERITED
  3800             0xFE10,   // FE10..FE1F; COMMON
  3801             0xFE20,   // FE20..FE2F; INHERITED
  3802             0xFE30,   // FE30..FE6F; COMMON
  3803             0xFE70,   // FE70..FEFE; ARABIC
  3804             0xFEFF,   // FEFF..FF20; COMMON
  3805             0xFF21,   // FF21..FF3A; LATIN
  3806             0xFF3B,   // FF3B..FF40; COMMON
  3807             0xFF41,   // FF41..FF5A; LATIN
  3808             0xFF5B,   // FF5B..FF65; COMMON
  3809             0xFF66,   // FF66..FF6F; KATAKANA
  3810             0xFF70,   // FF70..FF70; COMMON
  3811             0xFF71,   // FF71..FF9D; KATAKANA
  3812             0xFF9E,   // FF9E..FF9F; COMMON
  3813             0xFFA0,   // FFA0..FFDF; HANGUL
  3814             0xFFE0,   // FFE0..FFFF; COMMON
  3815             0x10000,  // 10000..100FF; LINEAR_B
  3816             0x10100,  // 10100..1013F; COMMON
  3817             0x10140,  // 10140..1018F; GREEK
  3818             0x10190,  // 10190..101FC; COMMON
  3819             0x101FD,  // 101FD..1027F; INHERITED
  3820             0x10280,  // 10280..1029F; LYCIAN
  3821             0x102A0,  // 102A0..102FF; CARIAN
  3822             0x10300,  // 10300..1032F; OLD_ITALIC
  3823             0x10330,  // 10330..1037F; GOTHIC
  3824             0x10380,  // 10380..1039F; UGARITIC
  3825             0x103A0,  // 103A0..103FF; OLD_PERSIAN
  3826             0x10400,  // 10400..1044F; DESERET
  3827             0x10450,  // 10450..1047F; SHAVIAN
  3828             0x10480,  // 10480..107FF; OSMANYA
  3829             0x10800,  // 10800..1083F; CYPRIOT
  3830             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
  3831             0x10900,  // 10900..1091F; PHOENICIAN
  3832             0x10920,  // 10920..109FF; LYDIAN
  3833             0x10A00,  // 10A00..10A5F; KHAROSHTHI
  3834             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
  3835             0x10B00,  // 10B00..10B3F; AVESTAN
  3836             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
  3837             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
  3838             0x10C00,  // 10C00..10E5F; OLD_TURKIC
  3839             0x10E60,  // 10E60..10FFF; ARABIC
  3840             0x11000,  // 11000..1107F; BRAHMI
  3841             0x11080,  // 11080..11FFF; KAITHI
  3842             0x12000,  // 12000..12FFF; CUNEIFORM
  3843             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
  3844             0x16800,  // 16800..16A38; BAMUM
  3845             0x1B000,  // 1B000..1B000; KATAKANA
  3846             0x1B001,  // 1B001..1CFFF; HIRAGANA
  3847             0x1D000,  // 1D000..1D166; COMMON
  3848             0x1D167,  // 1D167..1D169; INHERITED
  3849             0x1D16A,  // 1D16A..1D17A; COMMON
  3850             0x1D17B,  // 1D17B..1D182; INHERITED
  3851             0x1D183,  // 1D183..1D184; COMMON
  3852             0x1D185,  // 1D185..1D18B; INHERITED
  3853             0x1D18C,  // 1D18C..1D1A9; COMMON
  3854             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
  3855             0x1D1AE,  // 1D1AE..1D1FF; COMMON
  3856             0x1D200,  // 1D200..1D2FF; GREEK
  3857             0x1D300,  // 1D300..1F1FF; COMMON
  3858             0x1F200,  // 1F200..1F200; HIRAGANA
  3859             0x1F201,  // 1F210..1FFFF; COMMON
  3860             0x20000,  // 20000..E0000; HAN
  3861             0xE0001,  // E0001..E00FF; COMMON
  3862             0xE0100,  // E0100..E01EF; INHERITED
  3863             0xE01F0   // E01F0..10FFFF; UNKNOWN
  3864 
  3865         };
  3866 
  3867         private static final UnicodeScript[] scripts = {
  3868             COMMON,
  3869             LATIN,
  3870             COMMON,
  3871             LATIN,
  3872             COMMON,
  3873             LATIN,
  3874             COMMON,
  3875             LATIN,
  3876             COMMON,
  3877             LATIN,
  3878             COMMON,
  3879             LATIN,
  3880             COMMON,
  3881             LATIN,
  3882             COMMON,
  3883             LATIN,
  3884             COMMON,
  3885             BOPOMOFO,
  3886             COMMON,
  3887             INHERITED,
  3888             GREEK,
  3889             COMMON,
  3890             GREEK,
  3891             COMMON,
  3892             GREEK,
  3893             COMMON,
  3894             GREEK,
  3895             COMMON,
  3896             GREEK,
  3897             COPTIC,
  3898             GREEK,
  3899             CYRILLIC,
  3900             INHERITED,
  3901             CYRILLIC,
  3902             ARMENIAN,
  3903             COMMON,
  3904             ARMENIAN,
  3905             HEBREW,
  3906             ARABIC,
  3907             COMMON,
  3908             ARABIC,
  3909             COMMON,
  3910             ARABIC,
  3911             COMMON,
  3912             ARABIC,
  3913             COMMON,
  3914             ARABIC,
  3915             INHERITED,
  3916             ARABIC,
  3917             INHERITED,
  3918             COMMON,
  3919             ARABIC,
  3920             INHERITED,
  3921             ARABIC,
  3922             COMMON,
  3923             ARABIC,
  3924             SYRIAC,
  3925             ARABIC,
  3926             THAANA,
  3927             NKO,
  3928             SAMARITAN,
  3929             MANDAIC,
  3930             DEVANAGARI,
  3931             INHERITED,
  3932             DEVANAGARI,
  3933             COMMON,
  3934             DEVANAGARI,
  3935             COMMON,
  3936             DEVANAGARI,
  3937             BENGALI,
  3938             GURMUKHI,
  3939             GUJARATI,
  3940             ORIYA,
  3941             TAMIL,
  3942             TELUGU,
  3943             KANNADA,
  3944             MALAYALAM,
  3945             SINHALA,
  3946             THAI,
  3947             COMMON,
  3948             THAI,
  3949             LAO,
  3950             TIBETAN,
  3951             COMMON,
  3952             TIBETAN,
  3953             MYANMAR,
  3954             GEORGIAN,
  3955             COMMON,
  3956             GEORGIAN,
  3957             HANGUL,
  3958             ETHIOPIC,
  3959             CHEROKEE,
  3960             CANADIAN_ABORIGINAL,
  3961             OGHAM,
  3962             RUNIC,
  3963             COMMON,
  3964             RUNIC,
  3965             TAGALOG,
  3966             HANUNOO,
  3967             COMMON,
  3968             BUHID,
  3969             TAGBANWA,
  3970             KHMER,
  3971             MONGOLIAN,
  3972             COMMON,
  3973             MONGOLIAN,
  3974             COMMON,
  3975             MONGOLIAN,
  3976             CANADIAN_ABORIGINAL,
  3977             LIMBU,
  3978             TAI_LE,
  3979             NEW_TAI_LUE,
  3980             KHMER,
  3981             BUGINESE,
  3982             TAI_THAM,
  3983             BALINESE,
  3984             SUNDANESE,
  3985             BATAK,
  3986             LEPCHA,
  3987             OL_CHIKI,
  3988             INHERITED,
  3989             COMMON,
  3990             INHERITED,
  3991             COMMON,
  3992             INHERITED,
  3993             COMMON,
  3994             INHERITED,
  3995             COMMON,
  3996             LATIN,
  3997             GREEK,
  3998             CYRILLIC,
  3999             LATIN,
  4000             GREEK,
  4001             LATIN,
  4002             GREEK,
  4003             LATIN,
  4004             CYRILLIC,
  4005             LATIN,
  4006             GREEK,
  4007             INHERITED,
  4008             LATIN,
  4009             GREEK,
  4010             COMMON,
  4011             INHERITED,
  4012             COMMON,
  4013             LATIN,
  4014             COMMON,
  4015             LATIN,
  4016             COMMON,
  4017             LATIN,
  4018             COMMON,
  4019             INHERITED,
  4020             COMMON,
  4021             GREEK,
  4022             COMMON,
  4023             LATIN,
  4024             COMMON,
  4025             LATIN,
  4026             COMMON,
  4027             LATIN,
  4028             COMMON,
  4029             LATIN,
  4030             COMMON,
  4031             BRAILLE,
  4032             COMMON,
  4033             GLAGOLITIC,
  4034             LATIN,
  4035             COPTIC,
  4036             GEORGIAN,
  4037             TIFINAGH,
  4038             ETHIOPIC,
  4039             CYRILLIC,
  4040             COMMON,
  4041             HAN,
  4042             COMMON,
  4043             HAN,
  4044             COMMON,
  4045             HAN,
  4046             COMMON,
  4047             HAN,
  4048             INHERITED,
  4049             HANGUL,
  4050             COMMON,
  4051             HAN,
  4052             COMMON,
  4053             HIRAGANA,
  4054             INHERITED,
  4055             COMMON,
  4056             HIRAGANA,
  4057             COMMON,
  4058             KATAKANA,
  4059             COMMON,
  4060             KATAKANA,
  4061             BOPOMOFO,
  4062             HANGUL,
  4063             COMMON,
  4064             BOPOMOFO,
  4065             COMMON,
  4066             KATAKANA,
  4067             HANGUL,
  4068             COMMON,
  4069             HANGUL,
  4070             COMMON,
  4071             KATAKANA,
  4072             COMMON,
  4073             HAN,
  4074             COMMON,
  4075             HAN,
  4076             YI,
  4077             LISU,
  4078             VAI,
  4079             CYRILLIC,
  4080             BAMUM,
  4081             COMMON,
  4082             LATIN,
  4083             COMMON,
  4084             LATIN,
  4085             SYLOTI_NAGRI,
  4086             COMMON,
  4087             PHAGS_PA,
  4088             SAURASHTRA,
  4089             DEVANAGARI,
  4090             KAYAH_LI,
  4091             REJANG,
  4092             HANGUL,
  4093             JAVANESE,
  4094             CHAM,
  4095             MYANMAR,
  4096             TAI_VIET,
  4097             ETHIOPIC,
  4098             MEETEI_MAYEK,
  4099             HANGUL,
  4100             UNKNOWN,
  4101             HAN,
  4102             LATIN,
  4103             ARMENIAN,
  4104             HEBREW,
  4105             ARABIC,
  4106             COMMON,
  4107             ARABIC,
  4108             COMMON,
  4109             INHERITED,
  4110             COMMON,
  4111             INHERITED,
  4112             COMMON,
  4113             ARABIC,
  4114             COMMON,
  4115             LATIN,
  4116             COMMON,
  4117             LATIN,
  4118             COMMON,
  4119             KATAKANA,
  4120             COMMON,
  4121             KATAKANA,
  4122             COMMON,
  4123             HANGUL,
  4124             COMMON,
  4125             LINEAR_B,
  4126             COMMON,
  4127             GREEK,
  4128             COMMON,
  4129             INHERITED,
  4130             LYCIAN,
  4131             CARIAN,
  4132             OLD_ITALIC,
  4133             GOTHIC,
  4134             UGARITIC,
  4135             OLD_PERSIAN,
  4136             DESERET,
  4137             SHAVIAN,
  4138             OSMANYA,
  4139             CYPRIOT,
  4140             IMPERIAL_ARAMAIC,
  4141             PHOENICIAN,
  4142             LYDIAN,
  4143             KHAROSHTHI,
  4144             OLD_SOUTH_ARABIAN,
  4145             AVESTAN,
  4146             INSCRIPTIONAL_PARTHIAN,
  4147             INSCRIPTIONAL_PAHLAVI,
  4148             OLD_TURKIC,
  4149             ARABIC,
  4150             BRAHMI,
  4151             KAITHI,
  4152             CUNEIFORM,
  4153             EGYPTIAN_HIEROGLYPHS,
  4154             BAMUM,
  4155             KATAKANA,
  4156             HIRAGANA,
  4157             COMMON,
  4158             INHERITED,
  4159             COMMON,
  4160             INHERITED,
  4161             COMMON,
  4162             INHERITED,
  4163             COMMON,
  4164             INHERITED,
  4165             COMMON,
  4166             GREEK,
  4167             COMMON,
  4168             HIRAGANA,
  4169             COMMON,
  4170             HAN,
  4171             COMMON,
  4172             INHERITED,
  4173             UNKNOWN
  4174         };
  4175 
  4176         private static HashMap<String, Character.UnicodeScript> aliases;
  4177         static {
  4178             aliases = new HashMap<>(128);
  4179             aliases.put("ARAB", ARABIC);
  4180             aliases.put("ARMI", IMPERIAL_ARAMAIC);
  4181             aliases.put("ARMN", ARMENIAN);
  4182             aliases.put("AVST", AVESTAN);
  4183             aliases.put("BALI", BALINESE);
  4184             aliases.put("BAMU", BAMUM);
  4185             aliases.put("BATK", BATAK);
  4186             aliases.put("BENG", BENGALI);
  4187             aliases.put("BOPO", BOPOMOFO);
  4188             aliases.put("BRAI", BRAILLE);
  4189             aliases.put("BRAH", BRAHMI);
  4190             aliases.put("BUGI", BUGINESE);
  4191             aliases.put("BUHD", BUHID);
  4192             aliases.put("CANS", CANADIAN_ABORIGINAL);
  4193             aliases.put("CARI", CARIAN);
  4194             aliases.put("CHAM", CHAM);
  4195             aliases.put("CHER", CHEROKEE);
  4196             aliases.put("COPT", COPTIC);
  4197             aliases.put("CPRT", CYPRIOT);
  4198             aliases.put("CYRL", CYRILLIC);
  4199             aliases.put("DEVA", DEVANAGARI);
  4200             aliases.put("DSRT", DESERET);
  4201             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
  4202             aliases.put("ETHI", ETHIOPIC);
  4203             aliases.put("GEOR", GEORGIAN);
  4204             aliases.put("GLAG", GLAGOLITIC);
  4205             aliases.put("GOTH", GOTHIC);
  4206             aliases.put("GREK", GREEK);
  4207             aliases.put("GUJR", GUJARATI);
  4208             aliases.put("GURU", GURMUKHI);
  4209             aliases.put("HANG", HANGUL);
  4210             aliases.put("HANI", HAN);
  4211             aliases.put("HANO", HANUNOO);
  4212             aliases.put("HEBR", HEBREW);
  4213             aliases.put("HIRA", HIRAGANA);
  4214             // it appears we don't have the KATAKANA_OR_HIRAGANA
  4215             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
  4216             aliases.put("ITAL", OLD_ITALIC);
  4217             aliases.put("JAVA", JAVANESE);
  4218             aliases.put("KALI", KAYAH_LI);
  4219             aliases.put("KANA", KATAKANA);
  4220             aliases.put("KHAR", KHAROSHTHI);
  4221             aliases.put("KHMR", KHMER);
  4222             aliases.put("KNDA", KANNADA);
  4223             aliases.put("KTHI", KAITHI);
  4224             aliases.put("LANA", TAI_THAM);
  4225             aliases.put("LAOO", LAO);
  4226             aliases.put("LATN", LATIN);
  4227             aliases.put("LEPC", LEPCHA);
  4228             aliases.put("LIMB", LIMBU);
  4229             aliases.put("LINB", LINEAR_B);
  4230             aliases.put("LISU", LISU);
  4231             aliases.put("LYCI", LYCIAN);
  4232             aliases.put("LYDI", LYDIAN);
  4233             aliases.put("MAND", MANDAIC);
  4234             aliases.put("MLYM", MALAYALAM);
  4235             aliases.put("MONG", MONGOLIAN);
  4236             aliases.put("MTEI", MEETEI_MAYEK);
  4237             aliases.put("MYMR", MYANMAR);
  4238             aliases.put("NKOO", NKO);
  4239             aliases.put("OGAM", OGHAM);
  4240             aliases.put("OLCK", OL_CHIKI);
  4241             aliases.put("ORKH", OLD_TURKIC);
  4242             aliases.put("ORYA", ORIYA);
  4243             aliases.put("OSMA", OSMANYA);
  4244             aliases.put("PHAG", PHAGS_PA);
  4245             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
  4246             aliases.put("PHNX", PHOENICIAN);
  4247             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
  4248             aliases.put("RJNG", REJANG);
  4249             aliases.put("RUNR", RUNIC);
  4250             aliases.put("SAMR", SAMARITAN);
  4251             aliases.put("SARB", OLD_SOUTH_ARABIAN);
  4252             aliases.put("SAUR", SAURASHTRA);
  4253             aliases.put("SHAW", SHAVIAN);
  4254             aliases.put("SINH", SINHALA);
  4255             aliases.put("SUND", SUNDANESE);
  4256             aliases.put("SYLO", SYLOTI_NAGRI);
  4257             aliases.put("SYRC", SYRIAC);
  4258             aliases.put("TAGB", TAGBANWA);
  4259             aliases.put("TALE", TAI_LE);
  4260             aliases.put("TALU", NEW_TAI_LUE);
  4261             aliases.put("TAML", TAMIL);
  4262             aliases.put("TAVT", TAI_VIET);
  4263             aliases.put("TELU", TELUGU);
  4264             aliases.put("TFNG", TIFINAGH);
  4265             aliases.put("TGLG", TAGALOG);
  4266             aliases.put("THAA", THAANA);
  4267             aliases.put("THAI", THAI);
  4268             aliases.put("TIBT", TIBETAN);
  4269             aliases.put("UGAR", UGARITIC);
  4270             aliases.put("VAII", VAI);
  4271             aliases.put("XPEO", OLD_PERSIAN);
  4272             aliases.put("XSUX", CUNEIFORM);
  4273             aliases.put("YIII", YI);
  4274             aliases.put("ZINH", INHERITED);
  4275             aliases.put("ZYYY", COMMON);
  4276             aliases.put("ZZZZ", UNKNOWN);
  4277         }
  4278 
  4279         /**
  4280          * Returns the enum constant representing the Unicode script of which
  4281          * the given character (Unicode code point) is assigned to.
  4282          *
  4283          * @param   codePoint the character (Unicode code point) in question.
  4284          * @return  The {@code UnicodeScript} constant representing the
  4285          *          Unicode script of which this character is assigned to.
  4286          *
  4287          * @exception IllegalArgumentException if the specified
  4288          * {@code codePoint} is an invalid Unicode code point.
  4289          * @see Character#isValidCodePoint(int)
  4290          *
  4291          */
  4292         public static UnicodeScript of(int codePoint) {
  4293             if (!isValidCodePoint(codePoint))
  4294                 throw new IllegalArgumentException();
  4295             int type = getType(codePoint);
  4296             // leave SURROGATE and PRIVATE_USE for table lookup
  4297             if (type == UNASSIGNED)
  4298                 return UNKNOWN;
  4299             int index = Arrays.binarySearch(scriptStarts, codePoint);
  4300             if (index < 0)
  4301                 index = -index - 2;
  4302             return scripts[index];
  4303         }
  4304 
  4305         /**
  4306          * Returns the UnicodeScript constant with the given Unicode script
  4307          * name or the script name alias. Script names and their aliases are
  4308          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
  4309          * and PropertyValueAliases&lt;version&gt;.txt define script names
  4310          * and the script name aliases for a particular version of the
  4311          * standard. The {@link Character} class specifies the version of
  4312          * the standard that it supports.
  4313          * <p>
  4314          * Character case is ignored for all of the valid script names.
  4315          * The en_US locale's case mapping rules are used to provide
  4316          * case-insensitive string comparisons for script name validation.
  4317          * <p>
  4318          *
  4319          * @param scriptName A {@code UnicodeScript} name.
  4320          * @return The {@code UnicodeScript} constant identified
  4321          *         by {@code scriptName}
  4322          * @throws IllegalArgumentException if {@code scriptName} is an
  4323          *         invalid name
  4324          * @throws NullPointerException if {@code scriptName} is null
  4325          */
  4326         public static final UnicodeScript forName(String scriptName) {
  4327             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
  4328                                  //.replace(' ', '_'));
  4329             UnicodeScript sc = aliases.get(scriptName);
  4330             if (sc != null)
  4331                 return sc;
  4332             return valueOf(scriptName);
  4333         }
  4334     }
  4335 
  4336     /**
  4337      * The value of the {@code Character}.
  4338      *
  4339      * @serial
  4340      */
  4341     private final char value;
  4342 
  4343     /** use serialVersionUID from JDK 1.0.2 for interoperability */
  4344     private static final long serialVersionUID = 3786198910865385080L;
  4345 
  4346     /**
  4347      * Constructs a newly allocated {@code Character} object that
  4348      * represents the specified {@code char} value.
  4349      *
  4350      * @param  value   the value to be represented by the
  4351      *                  {@code Character} object.
  4352      */
  4353     public Character(char value) {
  4354         this.value = value;
  4355     }
  4356 
  4357     private static class CharacterCache {
  4358         private CharacterCache(){}
  4359 
  4360         static final Character cache[] = new Character[127 + 1];
  4361 
  4362         static {
  4363             for (int i = 0; i < cache.length; i++)
  4364                 cache[i] = new Character((char)i);
  4365         }
  4366     }
  4367 
  4368     /**
  4369      * Returns a <tt>Character</tt> instance representing the specified
  4370      * <tt>char</tt> value.
  4371      * If a new <tt>Character</tt> instance is not required, this method
  4372      * should generally be used in preference to the constructor
  4373      * {@link #Character(char)}, as this method is likely to yield
  4374      * significantly better space and time performance by caching
  4375      * frequently requested values.
  4376      *
  4377      * This method will always cache values in the range {@code
  4378      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
  4379      * cache other values outside of this range.
  4380      *
  4381      * @param  c a char value.
  4382      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
  4383      * @since  1.5
  4384      */
  4385     public static Character valueOf(char c) {
  4386         if (c <= 127) { // must cache
  4387             return CharacterCache.cache[(int)c];
  4388         }
  4389         return new Character(c);
  4390     }
  4391 
  4392     /**
  4393      * Returns the value of this {@code Character} object.
  4394      * @return  the primitive {@code char} value represented by
  4395      *          this object.
  4396      */
  4397     public char charValue() {
  4398         return value;
  4399     }
  4400 
  4401     /**
  4402      * Returns a hash code for this {@code Character}; equal to the result
  4403      * of invoking {@code charValue()}.
  4404      *
  4405      * @return a hash code value for this {@code Character}
  4406      */
  4407     public int hashCode() {
  4408         return (int)value;
  4409     }
  4410 
  4411     /**
  4412      * Compares this object against the specified object.
  4413      * The result is {@code true} if and only if the argument is not
  4414      * {@code null} and is a {@code Character} object that
  4415      * represents the same {@code char} value as this object.
  4416      *
  4417      * @param   obj   the object to compare with.
  4418      * @return  {@code true} if the objects are the same;
  4419      *          {@code false} otherwise.
  4420      */
  4421     public boolean equals(Object obj) {
  4422         if (obj instanceof Character) {
  4423             return value == ((Character)obj).charValue();
  4424         }
  4425         return false;
  4426     }
  4427 
  4428     /**
  4429      * Returns a {@code String} object representing this
  4430      * {@code Character}'s value.  The result is a string of
  4431      * length 1 whose sole component is the primitive
  4432      * {@code char} value represented by this
  4433      * {@code Character} object.
  4434      *
  4435      * @return  a string representation of this object.
  4436      */
  4437     public String toString() {
  4438         char buf[] = {value};
  4439         return String.valueOf(buf);
  4440     }
  4441 
  4442     /**
  4443      * Returns a {@code String} object representing the
  4444      * specified {@code char}.  The result is a string of length
  4445      * 1 consisting solely of the specified {@code char}.
  4446      *
  4447      * @param c the {@code char} to be converted
  4448      * @return the string representation of the specified {@code char}
  4449      * @since 1.4
  4450      */
  4451     public static String toString(char c) {
  4452         return String.valueOf(c);
  4453     }
  4454 
  4455     /**
  4456      * Determines whether the specified code point is a valid
  4457      * <a href="http://www.unicode.org/glossary/#code_point">
  4458      * Unicode code point value</a>.
  4459      *
  4460      * @param  codePoint the Unicode code point to be tested
  4461      * @return {@code true} if the specified code point value is between
  4462      *         {@link #MIN_CODE_POINT} and
  4463      *         {@link #MAX_CODE_POINT} inclusive;
  4464      *         {@code false} otherwise.
  4465      * @since  1.5
  4466      */
  4467     public static boolean isValidCodePoint(int codePoint) {
  4468         // Optimized form of:
  4469         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
  4470         int plane = codePoint >>> 16;
  4471         return plane < ((MAX_CODE_POINT + 1) >>> 16);
  4472     }
  4473 
  4474     /**
  4475      * Determines whether the specified character (Unicode code point)
  4476      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
  4477      * Such code points can be represented using a single {@code char}.
  4478      *
  4479      * @param  codePoint the character (Unicode code point) to be tested
  4480      * @return {@code true} if the specified code point is between
  4481      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
  4482      *         {@code false} otherwise.
  4483      * @since  1.7
  4484      */
  4485     public static boolean isBmpCodePoint(int codePoint) {
  4486         return codePoint >>> 16 == 0;
  4487         // Optimized form of:
  4488         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
  4489         // We consistently use logical shift (>>>) to facilitate
  4490         // additional runtime optimizations.
  4491     }
  4492 
  4493     /**
  4494      * Determines whether the specified character (Unicode code point)
  4495      * is in the <a href="#supplementary">supplementary character</a> range.
  4496      *
  4497      * @param  codePoint the character (Unicode code point) to be tested
  4498      * @return {@code true} if the specified code point is between
  4499      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
  4500      *         {@link #MAX_CODE_POINT} inclusive;
  4501      *         {@code false} otherwise.
  4502      * @since  1.5
  4503      */
  4504     public static boolean isSupplementaryCodePoint(int codePoint) {
  4505         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
  4506             && codePoint <  MAX_CODE_POINT + 1;
  4507     }
  4508 
  4509     /**
  4510      * Determines if the given {@code char} value is a
  4511      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  4512      * Unicode high-surrogate code unit</a>
  4513      * (also known as <i>leading-surrogate code unit</i>).
  4514      *
  4515      * <p>Such values do not represent characters by themselves,
  4516      * but are used in the representation of
  4517      * <a href="#supplementary">supplementary characters</a>
  4518      * in the UTF-16 encoding.
  4519      *
  4520      * @param  ch the {@code char} value to be tested.
  4521      * @return {@code true} if the {@code char} value is between
  4522      *         {@link #MIN_HIGH_SURROGATE} and
  4523      *         {@link #MAX_HIGH_SURROGATE} inclusive;
  4524      *         {@code false} otherwise.
  4525      * @see    Character#isLowSurrogate(char)
  4526      * @see    Character.UnicodeBlock#of(int)
  4527      * @since  1.5
  4528      */
  4529     public static boolean isHighSurrogate(char ch) {
  4530         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
  4531         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
  4532     }
  4533 
  4534     /**
  4535      * Determines if the given {@code char} value is a
  4536      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  4537      * Unicode low-surrogate code unit</a>
  4538      * (also known as <i>trailing-surrogate code unit</i>).
  4539      *
  4540      * <p>Such values do not represent characters by themselves,
  4541      * but are used in the representation of
  4542      * <a href="#supplementary">supplementary characters</a>
  4543      * in the UTF-16 encoding.
  4544      *
  4545      * @param  ch the {@code char} value to be tested.
  4546      * @return {@code true} if the {@code char} value is between
  4547      *         {@link #MIN_LOW_SURROGATE} and
  4548      *         {@link #MAX_LOW_SURROGATE} inclusive;
  4549      *         {@code false} otherwise.
  4550      * @see    Character#isHighSurrogate(char)
  4551      * @since  1.5
  4552      */
  4553     public static boolean isLowSurrogate(char ch) {
  4554         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
  4555     }
  4556 
  4557     /**
  4558      * Determines if the given {@code char} value is a Unicode
  4559      * <i>surrogate code unit</i>.
  4560      *
  4561      * <p>Such values do not represent characters by themselves,
  4562      * but are used in the representation of
  4563      * <a href="#supplementary">supplementary characters</a>
  4564      * in the UTF-16 encoding.
  4565      *
  4566      * <p>A char value is a surrogate code unit if and only if it is either
  4567      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
  4568      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
  4569      *
  4570      * @param  ch the {@code char} value to be tested.
  4571      * @return {@code true} if the {@code char} value is between
  4572      *         {@link #MIN_SURROGATE} and
  4573      *         {@link #MAX_SURROGATE} inclusive;
  4574      *         {@code false} otherwise.
  4575      * @since  1.7
  4576      */
  4577     public static boolean isSurrogate(char ch) {
  4578         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
  4579     }
  4580 
  4581     /**
  4582      * Determines whether the specified pair of {@code char}
  4583      * values is a valid
  4584      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  4585      * Unicode surrogate pair</a>.
  4586 
  4587      * <p>This method is equivalent to the expression:
  4588      * <blockquote><pre>
  4589      * isHighSurrogate(high) && isLowSurrogate(low)
  4590      * </pre></blockquote>
  4591      *
  4592      * @param  high the high-surrogate code value to be tested
  4593      * @param  low the low-surrogate code value to be tested
  4594      * @return {@code true} if the specified high and
  4595      * low-surrogate code values represent a valid surrogate pair;
  4596      * {@code false} otherwise.
  4597      * @since  1.5
  4598      */
  4599     public static boolean isSurrogatePair(char high, char low) {
  4600         return isHighSurrogate(high) && isLowSurrogate(low);
  4601     }
  4602 
  4603     /**
  4604      * Determines the number of {@code char} values needed to
  4605      * represent the specified character (Unicode code point). If the
  4606      * specified character is equal to or greater than 0x10000, then
  4607      * the method returns 2. Otherwise, the method returns 1.
  4608      *
  4609      * <p>This method doesn't validate the specified character to be a
  4610      * valid Unicode code point. The caller must validate the
  4611      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
  4612      * if necessary.
  4613      *
  4614      * @param   codePoint the character (Unicode code point) to be tested.
  4615      * @return  2 if the character is a valid supplementary character; 1 otherwise.
  4616      * @see     Character#isSupplementaryCodePoint(int)
  4617      * @since   1.5
  4618      */
  4619     public static int charCount(int codePoint) {
  4620         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
  4621     }
  4622 
  4623     /**
  4624      * Converts the specified surrogate pair to its supplementary code
  4625      * point value. This method does not validate the specified
  4626      * surrogate pair. The caller must validate it using {@link
  4627      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
  4628      *
  4629      * @param  high the high-surrogate code unit
  4630      * @param  low the low-surrogate code unit
  4631      * @return the supplementary code point composed from the
  4632      *         specified surrogate pair.
  4633      * @since  1.5
  4634      */
  4635     public static int toCodePoint(char high, char low) {
  4636         // Optimized form of:
  4637         // return ((high - MIN_HIGH_SURROGATE) << 10)
  4638         //         + (low - MIN_LOW_SURROGATE)
  4639         //         + MIN_SUPPLEMENTARY_CODE_POINT;
  4640         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
  4641                                        - (MIN_HIGH_SURROGATE << 10)
  4642                                        - MIN_LOW_SURROGATE);
  4643     }
  4644 
  4645     /**
  4646      * Returns the code point at the given index of the
  4647      * {@code CharSequence}. If the {@code char} value at
  4648      * the given index in the {@code CharSequence} is in the
  4649      * high-surrogate range, the following index is less than the
  4650      * length of the {@code CharSequence}, and the
  4651      * {@code char} value at the following index is in the
  4652      * low-surrogate range, then the supplementary code point
  4653      * corresponding to this surrogate pair is returned. Otherwise,
  4654      * the {@code char} value at the given index is returned.
  4655      *
  4656      * @param seq a sequence of {@code char} values (Unicode code
  4657      * units)
  4658      * @param index the index to the {@code char} values (Unicode
  4659      * code units) in {@code seq} to be converted
  4660      * @return the Unicode code point at the given index
  4661      * @exception NullPointerException if {@code seq} is null.
  4662      * @exception IndexOutOfBoundsException if the value
  4663      * {@code index} is negative or not less than
  4664      * {@link CharSequence#length() seq.length()}.
  4665      * @since  1.5
  4666      */
  4667     public static int codePointAt(CharSequence seq, int index) {
  4668         char c1 = seq.charAt(index++);
  4669         if (isHighSurrogate(c1)) {
  4670             if (index < seq.length()) {
  4671                 char c2 = seq.charAt(index);
  4672                 if (isLowSurrogate(c2)) {
  4673                     return toCodePoint(c1, c2);
  4674                 }
  4675             }
  4676         }
  4677         return c1;
  4678     }
  4679 
  4680     /**
  4681      * Returns the code point at the given index of the
  4682      * {@code char} array. If the {@code char} value at
  4683      * the given index in the {@code char} array is in the
  4684      * high-surrogate range, the following index is less than the
  4685      * length of the {@code char} array, and the
  4686      * {@code char} value at the following index is in the
  4687      * low-surrogate range, then the supplementary code point
  4688      * corresponding to this surrogate pair is returned. Otherwise,
  4689      * the {@code char} value at the given index is returned.
  4690      *
  4691      * @param a the {@code char} array
  4692      * @param index the index to the {@code char} values (Unicode
  4693      * code units) in the {@code char} array to be converted
  4694      * @return the Unicode code point at the given index
  4695      * @exception NullPointerException if {@code a} is null.
  4696      * @exception IndexOutOfBoundsException if the value
  4697      * {@code index} is negative or not less than
  4698      * the length of the {@code char} array.
  4699      * @since  1.5
  4700      */
  4701     public static int codePointAt(char[] a, int index) {
  4702         return codePointAtImpl(a, index, a.length);
  4703     }
  4704 
  4705     /**
  4706      * Returns the code point at the given index of the
  4707      * {@code char} array, where only array elements with
  4708      * {@code index} less than {@code limit} can be used. If
  4709      * the {@code char} value at the given index in the
  4710      * {@code char} array is in the high-surrogate range, the
  4711      * following index is less than the {@code limit}, and the
  4712      * {@code char} value at the following index is in the
  4713      * low-surrogate range, then the supplementary code point
  4714      * corresponding to this surrogate pair is returned. Otherwise,
  4715      * the {@code char} value at the given index is returned.
  4716      *
  4717      * @param a the {@code char} array
  4718      * @param index the index to the {@code char} values (Unicode
  4719      * code units) in the {@code char} array to be converted
  4720      * @param limit the index after the last array element that
  4721      * can be used in the {@code char} array
  4722      * @return the Unicode code point at the given index
  4723      * @exception NullPointerException if {@code a} is null.
  4724      * @exception IndexOutOfBoundsException if the {@code index}
  4725      * argument is negative or not less than the {@code limit}
  4726      * argument, or if the {@code limit} argument is negative or
  4727      * greater than the length of the {@code char} array.
  4728      * @since  1.5
  4729      */
  4730     public static int codePointAt(char[] a, int index, int limit) {
  4731         if (index >= limit || limit < 0 || limit > a.length) {
  4732             throw new IndexOutOfBoundsException();
  4733         }
  4734         return codePointAtImpl(a, index, limit);
  4735     }
  4736 
  4737     // throws ArrayIndexOutofBoundsException if index out of bounds
  4738     static int codePointAtImpl(char[] a, int index, int limit) {
  4739         char c1 = a[index++];
  4740         if (isHighSurrogate(c1)) {
  4741             if (index < limit) {
  4742                 char c2 = a[index];
  4743                 if (isLowSurrogate(c2)) {
  4744                     return toCodePoint(c1, c2);
  4745                 }
  4746             }
  4747         }
  4748         return c1;
  4749     }
  4750 
  4751     /**
  4752      * Returns the code point preceding the given index of the
  4753      * {@code CharSequence}. If the {@code char} value at
  4754      * {@code (index - 1)} in the {@code CharSequence} is in
  4755      * the low-surrogate range, {@code (index - 2)} is not
  4756      * negative, and the {@code char} value at {@code (index - 2)}
  4757      * in the {@code CharSequence} is in the
  4758      * high-surrogate range, then the supplementary code point
  4759      * corresponding to this surrogate pair is returned. Otherwise,
  4760      * the {@code char} value at {@code (index - 1)} is
  4761      * returned.
  4762      *
  4763      * @param seq the {@code CharSequence} instance
  4764      * @param index the index following the code point that should be returned
  4765      * @return the Unicode code point value before the given index.
  4766      * @exception NullPointerException if {@code seq} is null.
  4767      * @exception IndexOutOfBoundsException if the {@code index}
  4768      * argument is less than 1 or greater than {@link
  4769      * CharSequence#length() seq.length()}.
  4770      * @since  1.5
  4771      */
  4772     public static int codePointBefore(CharSequence seq, int index) {
  4773         char c2 = seq.charAt(--index);
  4774         if (isLowSurrogate(c2)) {
  4775             if (index > 0) {
  4776                 char c1 = seq.charAt(--index);
  4777                 if (isHighSurrogate(c1)) {
  4778                     return toCodePoint(c1, c2);
  4779                 }
  4780             }
  4781         }
  4782         return c2;
  4783     }
  4784 
  4785     /**
  4786      * Returns the code point preceding the given index of the
  4787      * {@code char} array. If the {@code char} value at
  4788      * {@code (index - 1)} in the {@code char} array is in
  4789      * the low-surrogate range, {@code (index - 2)} is not
  4790      * negative, and the {@code char} value at {@code (index - 2)}
  4791      * in the {@code char} array is in the
  4792      * high-surrogate range, then the supplementary code point
  4793      * corresponding to this surrogate pair is returned. Otherwise,
  4794      * the {@code char} value at {@code (index - 1)} is
  4795      * returned.
  4796      *
  4797      * @param a the {@code char} array
  4798      * @param index the index following the code point that should be returned
  4799      * @return the Unicode code point value before the given index.
  4800      * @exception NullPointerException if {@code a} is null.
  4801      * @exception IndexOutOfBoundsException if the {@code index}
  4802      * argument is less than 1 or greater than the length of the
  4803      * {@code char} array
  4804      * @since  1.5
  4805      */
  4806     public static int codePointBefore(char[] a, int index) {
  4807         return codePointBeforeImpl(a, index, 0);
  4808     }
  4809 
  4810     /**
  4811      * Returns the code point preceding the given index of the
  4812      * {@code char} array, where only array elements with
  4813      * {@code index} greater than or equal to {@code start}
  4814      * can be used. If the {@code char} value at {@code (index - 1)}
  4815      * in the {@code char} array is in the
  4816      * low-surrogate range, {@code (index - 2)} is not less than
  4817      * {@code start}, and the {@code char} value at
  4818      * {@code (index - 2)} in the {@code char} array is in
  4819      * the high-surrogate range, then the supplementary code point
  4820      * corresponding to this surrogate pair is returned. Otherwise,
  4821      * the {@code char} value at {@code (index - 1)} is
  4822      * returned.
  4823      *
  4824      * @param a the {@code char} array
  4825      * @param index the index following the code point that should be returned
  4826      * @param start the index of the first array element in the
  4827      * {@code char} array
  4828      * @return the Unicode code point value before the given index.
  4829      * @exception NullPointerException if {@code a} is null.
  4830      * @exception IndexOutOfBoundsException if the {@code index}
  4831      * argument is not greater than the {@code start} argument or
  4832      * is greater than the length of the {@code char} array, or
  4833      * if the {@code start} argument is negative or not less than
  4834      * the length of the {@code char} array.
  4835      * @since  1.5
  4836      */
  4837     public static int codePointBefore(char[] a, int index, int start) {
  4838         if (index <= start || start < 0 || start >= a.length) {
  4839             throw new IndexOutOfBoundsException();
  4840         }
  4841         return codePointBeforeImpl(a, index, start);
  4842     }
  4843 
  4844     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
  4845     static int codePointBeforeImpl(char[] a, int index, int start) {
  4846         char c2 = a[--index];
  4847         if (isLowSurrogate(c2)) {
  4848             if (index > start) {
  4849                 char c1 = a[--index];
  4850                 if (isHighSurrogate(c1)) {
  4851                     return toCodePoint(c1, c2);
  4852                 }
  4853             }
  4854         }
  4855         return c2;
  4856     }
  4857 
  4858     /**
  4859      * Returns the leading surrogate (a
  4860      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
  4861      * high surrogate code unit</a>) of the
  4862      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  4863      * surrogate pair</a>
  4864      * representing the specified supplementary character (Unicode
  4865      * code point) in the UTF-16 encoding.  If the specified character
  4866      * is not a
  4867      * <a href="Character.html#supplementary">supplementary character</a>,
  4868      * an unspecified {@code char} is returned.
  4869      *
  4870      * <p>If
  4871      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  4872      * is {@code true}, then
  4873      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
  4874      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
  4875      * are also always {@code true}.
  4876      *
  4877      * @param   codePoint a supplementary character (Unicode code point)
  4878      * @return  the leading surrogate code unit used to represent the
  4879      *          character in the UTF-16 encoding
  4880      * @since   1.7
  4881      */
  4882     public static char highSurrogate(int codePoint) {
  4883         return (char) ((codePoint >>> 10)
  4884             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
  4885     }
  4886 
  4887     /**
  4888      * Returns the trailing surrogate (a
  4889      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
  4890      * low surrogate code unit</a>) of the
  4891      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
  4892      * surrogate pair</a>
  4893      * representing the specified supplementary character (Unicode
  4894      * code point) in the UTF-16 encoding.  If the specified character
  4895      * is not a
  4896      * <a href="Character.html#supplementary">supplementary character</a>,
  4897      * an unspecified {@code char} is returned.
  4898      *
  4899      * <p>If
  4900      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
  4901      * is {@code true}, then
  4902      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
  4903      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
  4904      * are also always {@code true}.
  4905      *
  4906      * @param   codePoint a supplementary character (Unicode code point)
  4907      * @return  the trailing surrogate code unit used to represent the
  4908      *          character in the UTF-16 encoding
  4909      * @since   1.7
  4910      */
  4911     public static char lowSurrogate(int codePoint) {
  4912         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  4913     }
  4914 
  4915     /**
  4916      * Converts the specified character (Unicode code point) to its
  4917      * UTF-16 representation. If the specified code point is a BMP
  4918      * (Basic Multilingual Plane or Plane 0) value, the same value is
  4919      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
  4920      * specified code point is a supplementary character, its
  4921      * surrogate values are stored in {@code dst[dstIndex]}
  4922      * (high-surrogate) and {@code dst[dstIndex+1]}
  4923      * (low-surrogate), and 2 is returned.
  4924      *
  4925      * @param  codePoint the character (Unicode code point) to be converted.
  4926      * @param  dst an array of {@code char} in which the
  4927      * {@code codePoint}'s UTF-16 value is stored.
  4928      * @param dstIndex the start index into the {@code dst}
  4929      * array where the converted value is stored.
  4930      * @return 1 if the code point is a BMP code point, 2 if the
  4931      * code point is a supplementary code point.
  4932      * @exception IllegalArgumentException if the specified
  4933      * {@code codePoint} is not a valid Unicode code point.
  4934      * @exception NullPointerException if the specified {@code dst} is null.
  4935      * @exception IndexOutOfBoundsException if {@code dstIndex}
  4936      * is negative or not less than {@code dst.length}, or if
  4937      * {@code dst} at {@code dstIndex} doesn't have enough
  4938      * array element(s) to store the resulting {@code char}
  4939      * value(s). (If {@code dstIndex} is equal to
  4940      * {@code dst.length-1} and the specified
  4941      * {@code codePoint} is a supplementary character, the
  4942      * high-surrogate value is not stored in
  4943      * {@code dst[dstIndex]}.)
  4944      * @since  1.5
  4945      */
  4946     public static int toChars(int codePoint, char[] dst, int dstIndex) {
  4947         if (isBmpCodePoint(codePoint)) {
  4948             dst[dstIndex] = (char) codePoint;
  4949             return 1;
  4950         } else if (isValidCodePoint(codePoint)) {
  4951             toSurrogates(codePoint, dst, dstIndex);
  4952             return 2;
  4953         } else {
  4954             throw new IllegalArgumentException();
  4955         }
  4956     }
  4957 
  4958     /**
  4959      * Converts the specified character (Unicode code point) to its
  4960      * UTF-16 representation stored in a {@code char} array. If
  4961      * the specified code point is a BMP (Basic Multilingual Plane or
  4962      * Plane 0) value, the resulting {@code char} array has
  4963      * the same value as {@code codePoint}. If the specified code
  4964      * point is a supplementary code point, the resulting
  4965      * {@code char} array has the corresponding surrogate pair.
  4966      *
  4967      * @param  codePoint a Unicode code point
  4968      * @return a {@code char} array having
  4969      *         {@code codePoint}'s UTF-16 representation.
  4970      * @exception IllegalArgumentException if the specified
  4971      * {@code codePoint} is not a valid Unicode code point.
  4972      * @since  1.5
  4973      */
  4974     public static char[] toChars(int codePoint) {
  4975         if (isBmpCodePoint(codePoint)) {
  4976             return new char[] { (char) codePoint };
  4977         } else if (isValidCodePoint(codePoint)) {
  4978             char[] result = new char[2];
  4979             toSurrogates(codePoint, result, 0);
  4980             return result;
  4981         } else {
  4982             throw new IllegalArgumentException();
  4983         }
  4984     }
  4985 
  4986     static void toSurrogates(int codePoint, char[] dst, int index) {
  4987         // We write elements "backwards" to guarantee all-or-nothing
  4988         dst[index+1] = lowSurrogate(codePoint);
  4989         dst[index] = highSurrogate(codePoint);
  4990     }
  4991 
  4992     /**
  4993      * Returns the number of Unicode code points in the text range of
  4994      * the specified char sequence. The text range begins at the
  4995      * specified {@code beginIndex} and extends to the
  4996      * {@code char} at index {@code endIndex - 1}. Thus the
  4997      * length (in {@code char}s) of the text range is
  4998      * {@code endIndex-beginIndex}. Unpaired surrogates within
  4999      * the text range count as one code point each.
  5000      *
  5001      * @param seq the char sequence
  5002      * @param beginIndex the index to the first {@code char} of
  5003      * the text range.
  5004      * @param endIndex the index after the last {@code char} of
  5005      * the text range.
  5006      * @return the number of Unicode code points in the specified text
  5007      * range
  5008      * @exception NullPointerException if {@code seq} is null.
  5009      * @exception IndexOutOfBoundsException if the
  5010      * {@code beginIndex} is negative, or {@code endIndex}
  5011      * is larger than the length of the given sequence, or
  5012      * {@code beginIndex} is larger than {@code endIndex}.
  5013      * @since  1.5
  5014      */
  5015     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
  5016         int length = seq.length();
  5017         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
  5018             throw new IndexOutOfBoundsException();
  5019         }
  5020         int n = endIndex - beginIndex;
  5021         for (int i = beginIndex; i < endIndex; ) {
  5022             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
  5023                 isLowSurrogate(seq.charAt(i))) {
  5024                 n--;
  5025                 i++;
  5026             }
  5027         }
  5028         return n;
  5029     }
  5030 
  5031     /**
  5032      * Returns the number of Unicode code points in a subarray of the
  5033      * {@code char} array argument. The {@code offset}
  5034      * argument is the index of the first {@code char} of the
  5035      * subarray and the {@code count} argument specifies the
  5036      * length of the subarray in {@code char}s. Unpaired
  5037      * surrogates within the subarray count as one code point each.
  5038      *
  5039      * @param a the {@code char} array
  5040      * @param offset the index of the first {@code char} in the
  5041      * given {@code char} array
  5042      * @param count the length of the subarray in {@code char}s
  5043      * @return the number of Unicode code points in the specified subarray
  5044      * @exception NullPointerException if {@code a} is null.
  5045      * @exception IndexOutOfBoundsException if {@code offset} or
  5046      * {@code count} is negative, or if {@code offset +
  5047      * count} is larger than the length of the given array.
  5048      * @since  1.5
  5049      */
  5050     public static int codePointCount(char[] a, int offset, int count) {
  5051         if (count > a.length - offset || offset < 0 || count < 0) {
  5052             throw new IndexOutOfBoundsException();
  5053         }
  5054         return codePointCountImpl(a, offset, count);
  5055     }
  5056 
  5057     static int codePointCountImpl(char[] a, int offset, int count) {
  5058         int endIndex = offset + count;
  5059         int n = count;
  5060         for (int i = offset; i < endIndex; ) {
  5061             if (isHighSurrogate(a[i++]) && i < endIndex &&
  5062                 isLowSurrogate(a[i])) {
  5063                 n--;
  5064                 i++;
  5065             }
  5066         }
  5067         return n;
  5068     }
  5069 
  5070     /**
  5071      * Returns the index within the given char sequence that is offset
  5072      * from the given {@code index} by {@code codePointOffset}
  5073      * code points. Unpaired surrogates within the text range given by
  5074      * {@code index} and {@code codePointOffset} count as
  5075      * one code point each.
  5076      *
  5077      * @param seq the char sequence
  5078      * @param index the index to be offset
  5079      * @param codePointOffset the offset in code points
  5080      * @return the index within the char sequence
  5081      * @exception NullPointerException if {@code seq} is null.
  5082      * @exception IndexOutOfBoundsException if {@code index}
  5083      *   is negative or larger then the length of the char sequence,
  5084      *   or if {@code codePointOffset} is positive and the
  5085      *   subsequence starting with {@code index} has fewer than
  5086      *   {@code codePointOffset} code points, or if
  5087      *   {@code codePointOffset} is negative and the subsequence
  5088      *   before {@code index} has fewer than the absolute value
  5089      *   of {@code codePointOffset} code points.
  5090      * @since 1.5
  5091      */
  5092     public static int offsetByCodePoints(CharSequence seq, int index,
  5093                                          int codePointOffset) {
  5094         int length = seq.length();
  5095         if (index < 0 || index > length) {
  5096             throw new IndexOutOfBoundsException();
  5097         }
  5098 
  5099         int x = index;
  5100         if (codePointOffset >= 0) {
  5101             int i;
  5102             for (i = 0; x < length && i < codePointOffset; i++) {
  5103                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
  5104                     isLowSurrogate(seq.charAt(x))) {
  5105                     x++;
  5106                 }
  5107             }
  5108             if (i < codePointOffset) {
  5109                 throw new IndexOutOfBoundsException();
  5110             }
  5111         } else {
  5112             int i;
  5113             for (i = codePointOffset; x > 0 && i < 0; i++) {
  5114                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
  5115                     isHighSurrogate(seq.charAt(x-1))) {
  5116                     x--;
  5117                 }
  5118             }
  5119             if (i < 0) {
  5120                 throw new IndexOutOfBoundsException();
  5121             }
  5122         }
  5123         return x;
  5124     }
  5125 
  5126     /**
  5127      * Returns the index within the given {@code char} subarray
  5128      * that is offset from the given {@code index} by
  5129      * {@code codePointOffset} code points. The
  5130      * {@code start} and {@code count} arguments specify a
  5131      * subarray of the {@code char} array. Unpaired surrogates
  5132      * within the text range given by {@code index} and
  5133      * {@code codePointOffset} count as one code point each.
  5134      *
  5135      * @param a the {@code char} array
  5136      * @param start the index of the first {@code char} of the
  5137      * subarray
  5138      * @param count the length of the subarray in {@code char}s
  5139      * @param index the index to be offset
  5140      * @param codePointOffset the offset in code points
  5141      * @return the index within the subarray
  5142      * @exception NullPointerException if {@code a} is null.
  5143      * @exception IndexOutOfBoundsException
  5144      *   if {@code start} or {@code count} is negative,
  5145      *   or if {@code start + count} is larger than the length of
  5146      *   the given array,
  5147      *   or if {@code index} is less than {@code start} or
  5148      *   larger then {@code start + count},
  5149      *   or if {@code codePointOffset} is positive and the text range
  5150      *   starting with {@code index} and ending with {@code start + count - 1}
  5151      *   has fewer than {@code codePointOffset} code
  5152      *   points,
  5153      *   or if {@code codePointOffset} is negative and the text range
  5154      *   starting with {@code start} and ending with {@code index - 1}
  5155      *   has fewer than the absolute value of
  5156      *   {@code codePointOffset} code points.
  5157      * @since 1.5
  5158      */
  5159     public static int offsetByCodePoints(char[] a, int start, int count,
  5160                                          int index, int codePointOffset) {
  5161         if (count > a.length-start || start < 0 || count < 0
  5162             || index < start || index > start+count) {
  5163             throw new IndexOutOfBoundsException();
  5164         }
  5165         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
  5166     }
  5167 
  5168     static int offsetByCodePointsImpl(char[]a, int start, int count,
  5169                                       int index, int codePointOffset) {
  5170         int x = index;
  5171         if (codePointOffset >= 0) {
  5172             int limit = start + count;
  5173             int i;
  5174             for (i = 0; x < limit && i < codePointOffset; i++) {
  5175                 if (isHighSurrogate(a[x++]) && x < limit &&
  5176                     isLowSurrogate(a[x])) {
  5177                     x++;
  5178                 }
  5179             }
  5180             if (i < codePointOffset) {
  5181                 throw new IndexOutOfBoundsException();
  5182             }
  5183         } else {
  5184             int i;
  5185             for (i = codePointOffset; x > start && i < 0; i++) {
  5186                 if (isLowSurrogate(a[--x]) && x > start &&
  5187                     isHighSurrogate(a[x-1])) {
  5188                     x--;
  5189                 }
  5190             }
  5191             if (i < 0) {
  5192                 throw new IndexOutOfBoundsException();
  5193             }
  5194         }
  5195         return x;
  5196     }
  5197 
  5198     /**
  5199      * Determines if the specified character is a lowercase character.
  5200      * <p>
  5201      * A character is lowercase if its general category type, provided
  5202      * by {@code Character.getType(ch)}, is
  5203      * {@code LOWERCASE_LETTER}, or it has contributory property
  5204      * Other_Lowercase as defined by the Unicode Standard.
  5205      * <p>
  5206      * The following are examples of lowercase characters:
  5207      * <p><blockquote><pre>
  5208      * a b c d e f g h i j k l m n o p q r s t u v w x y z
  5209      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
  5210      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
  5211      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
  5212      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
  5213      * </pre></blockquote>
  5214      * <p> Many other Unicode characters are lowercase too.
  5215      *
  5216      * <p><b>Note:</b> This method cannot handle <a
  5217      * href="#supplementary"> supplementary characters</a>. To support
  5218      * all Unicode characters, including supplementary characters, use
  5219      * the {@link #isLowerCase(int)} method.
  5220      *
  5221      * @param   ch   the character to be tested.
  5222      * @return  {@code true} if the character is lowercase;
  5223      *          {@code false} otherwise.
  5224      * @see     Character#isLowerCase(char)
  5225      * @see     Character#isTitleCase(char)
  5226      * @see     Character#toLowerCase(char)
  5227      * @see     Character#getType(char)
  5228      */
  5229     public static boolean isLowerCase(char ch) {
  5230         return isLowerCase((int)ch);
  5231     }
  5232 
  5233     /**
  5234      * Determines if the specified character (Unicode code point) is a
  5235      * lowercase character.
  5236      * <p>
  5237      * A character is lowercase if its general category type, provided
  5238      * by {@link Character#getType getType(codePoint)}, is
  5239      * {@code LOWERCASE_LETTER}, or it has contributory property
  5240      * Other_Lowercase as defined by the Unicode Standard.
  5241      * <p>
  5242      * The following are examples of lowercase characters:
  5243      * <p><blockquote><pre>
  5244      * a b c d e f g h i j k l m n o p q r s t u v w x y z
  5245      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
  5246      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
  5247      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
  5248      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
  5249      * </pre></blockquote>
  5250      * <p> Many other Unicode characters are lowercase too.
  5251      *
  5252      * @param   codePoint the character (Unicode code point) to be tested.
  5253      * @return  {@code true} if the character is lowercase;
  5254      *          {@code false} otherwise.
  5255      * @see     Character#isLowerCase(int)
  5256      * @see     Character#isTitleCase(int)
  5257      * @see     Character#toLowerCase(int)
  5258      * @see     Character#getType(int)
  5259      * @since   1.5
  5260      */
  5261     public static boolean isLowerCase(int codePoint) {
  5262         return getType(codePoint) == Character.LOWERCASE_LETTER ||
  5263                CharacterData.of(codePoint).isOtherLowercase(codePoint);
  5264     }
  5265 
  5266     /**
  5267      * Determines if the specified character is an uppercase character.
  5268      * <p>
  5269      * A character is uppercase if its general category type, provided by
  5270      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
  5271      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
  5272      * <p>
  5273      * The following are examples of uppercase characters:
  5274      * <p><blockquote><pre>
  5275      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  5276      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
  5277      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
  5278      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
  5279      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
  5280      * </pre></blockquote>
  5281      * <p> Many other Unicode characters are uppercase too.<p>
  5282      *
  5283      * <p><b>Note:</b> This method cannot handle <a
  5284      * href="#supplementary"> supplementary characters</a>. To support
  5285      * all Unicode characters, including supplementary characters, use
  5286      * the {@link #isUpperCase(int)} method.
  5287      *
  5288      * @param   ch   the character to be tested.
  5289      * @return  {@code true} if the character is uppercase;
  5290      *          {@code false} otherwise.
  5291      * @see     Character#isLowerCase(char)
  5292      * @see     Character#isTitleCase(char)
  5293      * @see     Character#toUpperCase(char)
  5294      * @see     Character#getType(char)
  5295      * @since   1.0
  5296      */
  5297     public static boolean isUpperCase(char ch) {
  5298         return isUpperCase((int)ch);
  5299     }
  5300 
  5301     /**
  5302      * Determines if the specified character (Unicode code point) is an uppercase character.
  5303      * <p>
  5304      * A character is uppercase if its general category type, provided by
  5305      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
  5306      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
  5307      * <p>
  5308      * The following are examples of uppercase characters:
  5309      * <p><blockquote><pre>
  5310      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  5311      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
  5312      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
  5313      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
  5314      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
  5315      * </pre></blockquote>
  5316      * <p> Many other Unicode characters are uppercase too.<p>
  5317      *
  5318      * @param   codePoint the character (Unicode code point) to be tested.
  5319      * @return  {@code true} if the character is uppercase;
  5320      *          {@code false} otherwise.
  5321      * @see     Character#isLowerCase(int)
  5322      * @see     Character#isTitleCase(int)
  5323      * @see     Character#toUpperCase(int)
  5324      * @see     Character#getType(int)
  5325      * @since   1.5
  5326      */
  5327     public static boolean isUpperCase(int codePoint) {
  5328         return getType(codePoint) == Character.UPPERCASE_LETTER ||
  5329                CharacterData.of(codePoint).isOtherUppercase(codePoint);
  5330     }
  5331 
  5332     /**
  5333      * Determines if the specified character is a titlecase character.
  5334      * <p>
  5335      * A character is a titlecase character if its general
  5336      * category type, provided by {@code Character.getType(ch)},
  5337      * is {@code TITLECASE_LETTER}.
  5338      * <p>
  5339      * Some characters look like pairs of Latin letters. For example, there
  5340      * is an uppercase letter that looks like "LJ" and has a corresponding
  5341      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  5342      * is the appropriate form to use when rendering a word in lowercase
  5343      * with initial capitals, as for a book title.
  5344      * <p>
  5345      * These are some of the Unicode characters for which this method returns
  5346      * {@code true}:
  5347      * <ul>
  5348      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  5349      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  5350      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  5351      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  5352      * </ul>
  5353      * <p> Many other Unicode characters are titlecase too.<p>
  5354      *
  5355      * <p><b>Note:</b> This method cannot handle <a
  5356      * href="#supplementary"> supplementary characters</a>. To support
  5357      * all Unicode characters, including supplementary characters, use
  5358      * the {@link #isTitleCase(int)} method.
  5359      *
  5360      * @param   ch   the character to be tested.
  5361      * @return  {@code true} if the character is titlecase;
  5362      *          {@code false} otherwise.
  5363      * @see     Character#isLowerCase(char)
  5364      * @see     Character#isUpperCase(char)
  5365      * @see     Character#toTitleCase(char)
  5366      * @see     Character#getType(char)
  5367      * @since   1.0.2
  5368      */
  5369     public static boolean isTitleCase(char ch) {
  5370         return isTitleCase((int)ch);
  5371     }
  5372 
  5373     /**
  5374      * Determines if the specified character (Unicode code point) is a titlecase character.
  5375      * <p>
  5376      * A character is a titlecase character if its general
  5377      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  5378      * is {@code TITLECASE_LETTER}.
  5379      * <p>
  5380      * Some characters look like pairs of Latin letters. For example, there
  5381      * is an uppercase letter that looks like "LJ" and has a corresponding
  5382      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
  5383      * is the appropriate form to use when rendering a word in lowercase
  5384      * with initial capitals, as for a book title.
  5385      * <p>
  5386      * These are some of the Unicode characters for which this method returns
  5387      * {@code true}:
  5388      * <ul>
  5389      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
  5390      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
  5391      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
  5392      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
  5393      * </ul>
  5394      * <p> Many other Unicode characters are titlecase too.<p>
  5395      *
  5396      * @param   codePoint the character (Unicode code point) to be tested.
  5397      * @return  {@code true} if the character is titlecase;
  5398      *          {@code false} otherwise.
  5399      * @see     Character#isLowerCase(int)
  5400      * @see     Character#isUpperCase(int)
  5401      * @see     Character#toTitleCase(int)
  5402      * @see     Character#getType(int)
  5403      * @since   1.5
  5404      */
  5405     public static boolean isTitleCase(int codePoint) {
  5406         return getType(codePoint) == Character.TITLECASE_LETTER;
  5407     }
  5408 
  5409     /**
  5410      * Determines if the specified character is a digit.
  5411      * <p>
  5412      * A character is a digit if its general category type, provided
  5413      * by {@code Character.getType(ch)}, is
  5414      * {@code DECIMAL_DIGIT_NUMBER}.
  5415      * <p>
  5416      * Some Unicode character ranges that contain digits:
  5417      * <ul>
  5418      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  5419      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  5420      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  5421      *     Arabic-Indic digits
  5422      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  5423      *     Extended Arabic-Indic digits
  5424      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  5425      *     Devanagari digits
  5426      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  5427      *     Fullwidth digits
  5428      * </ul>
  5429      *
  5430      * Many other character ranges contain digits as well.
  5431      *
  5432      * <p><b>Note:</b> This method cannot handle <a
  5433      * href="#supplementary"> supplementary characters</a>. To support
  5434      * all Unicode characters, including supplementary characters, use
  5435      * the {@link #isDigit(int)} method.
  5436      *
  5437      * @param   ch   the character to be tested.
  5438      * @return  {@code true} if the character is a digit;
  5439      *          {@code false} otherwise.
  5440      * @see     Character#digit(char, int)
  5441      * @see     Character#forDigit(int, int)
  5442      * @see     Character#getType(char)
  5443      */
  5444     public static boolean isDigit(char ch) {
  5445         return isDigit((int)ch);
  5446     }
  5447 
  5448     /**
  5449      * Determines if the specified character (Unicode code point) is a digit.
  5450      * <p>
  5451      * A character is a digit if its general category type, provided
  5452      * by {@link Character#getType(int) getType(codePoint)}, is
  5453      * {@code DECIMAL_DIGIT_NUMBER}.
  5454      * <p>
  5455      * Some Unicode character ranges that contain digits:
  5456      * <ul>
  5457      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
  5458      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
  5459      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
  5460      *     Arabic-Indic digits
  5461      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
  5462      *     Extended Arabic-Indic digits
  5463      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
  5464      *     Devanagari digits
  5465      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
  5466      *     Fullwidth digits
  5467      * </ul>
  5468      *
  5469      * Many other character ranges contain digits as well.
  5470      *
  5471      * @param   codePoint the character (Unicode code point) to be tested.
  5472      * @return  {@code true} if the character is a digit;
  5473      *          {@code false} otherwise.
  5474      * @see     Character#forDigit(int, int)
  5475      * @see     Character#getType(int)
  5476      * @since   1.5
  5477      */
  5478     public static boolean isDigit(int codePoint) {
  5479         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
  5480     }
  5481 
  5482     /**
  5483      * Determines if a character is defined in Unicode.
  5484      * <p>
  5485      * A character is defined if at least one of the following is true:
  5486      * <ul>
  5487      * <li>It has an entry in the UnicodeData file.
  5488      * <li>It has a value in a range defined by the UnicodeData file.
  5489      * </ul>
  5490      *
  5491      * <p><b>Note:</b> This method cannot handle <a
  5492      * href="#supplementary"> supplementary characters</a>. To support
  5493      * all Unicode characters, including supplementary characters, use
  5494      * the {@link #isDefined(int)} method.
  5495      *
  5496      * @param   ch   the character to be tested
  5497      * @return  {@code true} if the character has a defined meaning
  5498      *          in Unicode; {@code false} otherwise.
  5499      * @see     Character#isDigit(char)
  5500      * @see     Character#isLetter(char)
  5501      * @see     Character#isLetterOrDigit(char)
  5502      * @see     Character#isLowerCase(char)
  5503      * @see     Character#isTitleCase(char)
  5504      * @see     Character#isUpperCase(char)
  5505      * @since   1.0.2
  5506      */
  5507     public static boolean isDefined(char ch) {
  5508         return isDefined((int)ch);
  5509     }
  5510 
  5511     /**
  5512      * Determines if a character (Unicode code point) is defined in Unicode.
  5513      * <p>
  5514      * A character is defined if at least one of the following is true:
  5515      * <ul>
  5516      * <li>It has an entry in the UnicodeData file.
  5517      * <li>It has a value in a range defined by the UnicodeData file.
  5518      * </ul>
  5519      *
  5520      * @param   codePoint the character (Unicode code point) to be tested.
  5521      * @return  {@code true} if the character has a defined meaning
  5522      *          in Unicode; {@code false} otherwise.
  5523      * @see     Character#isDigit(int)
  5524      * @see     Character#isLetter(int)
  5525      * @see     Character#isLetterOrDigit(int)
  5526      * @see     Character#isLowerCase(int)
  5527      * @see     Character#isTitleCase(int)
  5528      * @see     Character#isUpperCase(int)
  5529      * @since   1.5
  5530      */
  5531     public static boolean isDefined(int codePoint) {
  5532         return getType(codePoint) != Character.UNASSIGNED;
  5533     }
  5534 
  5535     /**
  5536      * Determines if the specified character is a letter.
  5537      * <p>
  5538      * A character is considered to be a letter if its general
  5539      * category type, provided by {@code Character.getType(ch)},
  5540      * is any of the following:
  5541      * <ul>
  5542      * <li> {@code UPPERCASE_LETTER}
  5543      * <li> {@code LOWERCASE_LETTER}
  5544      * <li> {@code TITLECASE_LETTER}
  5545      * <li> {@code MODIFIER_LETTER}
  5546      * <li> {@code OTHER_LETTER}
  5547      * </ul>
  5548      *
  5549      * Not all letters have case. Many characters are
  5550      * letters but are neither uppercase nor lowercase nor titlecase.
  5551      *
  5552      * <p><b>Note:</b> This method cannot handle <a
  5553      * href="#supplementary"> supplementary characters</a>. To support
  5554      * all Unicode characters, including supplementary characters, use
  5555      * the {@link #isLetter(int)} method.
  5556      *
  5557      * @param   ch   the character to be tested.
  5558      * @return  {@code true} if the character is a letter;
  5559      *          {@code false} otherwise.
  5560      * @see     Character#isDigit(char)
  5561      * @see     Character#isJavaIdentifierStart(char)
  5562      * @see     Character#isJavaLetter(char)
  5563      * @see     Character#isJavaLetterOrDigit(char)
  5564      * @see     Character#isLetterOrDigit(char)
  5565      * @see     Character#isLowerCase(char)
  5566      * @see     Character#isTitleCase(char)
  5567      * @see     Character#isUnicodeIdentifierStart(char)
  5568      * @see     Character#isUpperCase(char)
  5569      */
  5570     public static boolean isLetter(char ch) {
  5571         return isLetter((int)ch);
  5572     }
  5573 
  5574     /**
  5575      * Determines if the specified character (Unicode code point) is a letter.
  5576      * <p>
  5577      * A character is considered to be a letter if its general
  5578      * category type, provided by {@link Character#getType(int) getType(codePoint)},
  5579      * is any of the following:
  5580      * <ul>
  5581      * <li> {@code UPPERCASE_LETTER}
  5582      * <li> {@code LOWERCASE_LETTER}
  5583      * <li> {@code TITLECASE_LETTER}
  5584      * <li> {@code MODIFIER_LETTER}
  5585      * <li> {@code OTHER_LETTER}
  5586      * </ul>
  5587      *
  5588      * Not all letters have case. Many characters are
  5589      * letters but are neither uppercase nor lowercase nor titlecase.
  5590      *
  5591      * @param   codePoint the character (Unicode code point) to be tested.
  5592      * @return  {@code true} if the character is a letter;
  5593      *          {@code false} otherwise.
  5594      * @see     Character#isDigit(int)
  5595      * @see     Character#isJavaIdentifierStart(int)
  5596      * @see     Character#isLetterOrDigit(int)
  5597      * @see     Character#isLowerCase(int)
  5598      * @see     Character#isTitleCase(int)
  5599      * @see     Character#isUnicodeIdentifierStart(int)
  5600      * @see     Character#isUpperCase(int)
  5601      * @since   1.5
  5602      */
  5603     public static boolean isLetter(int codePoint) {
  5604         return ((((1 << Character.UPPERCASE_LETTER) |
  5605             (1 << Character.LOWERCASE_LETTER) |
  5606             (1 << Character.TITLECASE_LETTER) |
  5607             (1 << Character.MODIFIER_LETTER) |
  5608             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
  5609             != 0;
  5610     }
  5611 
  5612     /**
  5613      * Determines if the specified character is a letter or digit.
  5614      * <p>
  5615      * A character is considered to be a letter or digit if either
  5616      * {@code Character.isLetter(char ch)} or
  5617      * {@code Character.isDigit(char ch)} returns
  5618      * {@code true} for the character.
  5619      *
  5620      * <p><b>Note:</b> This method cannot handle <a
  5621      * href="#supplementary"> supplementary characters</a>. To support
  5622      * all Unicode characters, including supplementary characters, use
  5623      * the {@link #isLetterOrDigit(int)} method.
  5624      *
  5625      * @param   ch   the character to be tested.
  5626      * @return  {@code true} if the character is a letter or digit;
  5627      *          {@code false} otherwise.
  5628      * @see     Character#isDigit(char)
  5629      * @see     Character#isJavaIdentifierPart(char)
  5630      * @see     Character#isJavaLetter(char)
  5631      * @see     Character#isJavaLetterOrDigit(char)
  5632      * @see     Character#isLetter(char)
  5633      * @see     Character#isUnicodeIdentifierPart(char)
  5634      * @since   1.0.2
  5635      */
  5636     public static boolean isLetterOrDigit(char ch) {
  5637         return isLetterOrDigit((int)ch);
  5638     }
  5639 
  5640     /**
  5641      * Determines if the specified character (Unicode code point) is a letter or digit.
  5642      * <p>
  5643      * A character is considered to be a letter or digit if either
  5644      * {@link #isLetter(int) isLetter(codePoint)} or
  5645      * {@link #isDigit(int) isDigit(codePoint)} returns
  5646      * {@code true} for the character.
  5647      *
  5648      * @param   codePoint the character (Unicode code point) to be tested.
  5649      * @return  {@code true} if the character is a letter or digit;
  5650      *          {@code false} otherwise.
  5651      * @see     Character#isDigit(int)
  5652      * @see     Character#isJavaIdentifierPart(int)
  5653      * @see     Character#isLetter(int)
  5654      * @see     Character#isUnicodeIdentifierPart(int)
  5655      * @since   1.5
  5656      */
  5657     public static boolean isLetterOrDigit(int codePoint) {
  5658         return ((((1 << Character.UPPERCASE_LETTER) |
  5659             (1 << Character.LOWERCASE_LETTER) |
  5660             (1 << Character.TITLECASE_LETTER) |
  5661             (1 << Character.MODIFIER_LETTER) |
  5662             (1 << Character.OTHER_LETTER) |
  5663             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
  5664             != 0;
  5665     }
  5666 
  5667     /**
  5668      * Determines if the specified character is permissible as the first
  5669      * character in a Java identifier.
  5670      * <p>
  5671      * A character may start a Java identifier if and only if
  5672      * one of the following is true:
  5673      * <ul>
  5674      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
  5675      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
  5676      * <li> {@code ch} is a currency symbol (such as {@code '$'})
  5677      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
  5678      * </ul>
  5679      *
  5680      * @param   ch the character to be tested.
  5681      * @return  {@code true} if the character may start a Java
  5682      *          identifier; {@code false} otherwise.
  5683      * @see     Character#isJavaLetterOrDigit(char)
  5684      * @see     Character#isJavaIdentifierStart(char)
  5685      * @see     Character#isJavaIdentifierPart(char)
  5686      * @see     Character#isLetter(char)
  5687      * @see     Character#isLetterOrDigit(char)
  5688      * @see     Character#isUnicodeIdentifierStart(char)
  5689      * @since   1.02
  5690      * @deprecated Replaced by isJavaIdentifierStart(char).
  5691      */
  5692     @Deprecated
  5693     public static boolean isJavaLetter(char ch) {
  5694         return isJavaIdentifierStart(ch);
  5695     }
  5696 
  5697     /**
  5698      * Determines if the specified character may be part of a Java
  5699      * identifier as other than the first character.
  5700      * <p>
  5701      * A character may be part of a Java identifier if and only if any
  5702      * of the following are true:
  5703      * <ul>
  5704      * <li>  it is a letter
  5705      * <li>  it is a currency symbol (such as {@code '$'})
  5706      * <li>  it is a connecting punctuation character (such as {@code '_'})
  5707      * <li>  it is a digit
  5708      * <li>  it is a numeric letter (such as a Roman numeral character)
  5709      * <li>  it is a combining mark
  5710      * <li>  it is a non-spacing mark
  5711      * <li> {@code isIdentifierIgnorable} returns
  5712      * {@code true} for the character.
  5713      * </ul>
  5714      *
  5715      * @param   ch the character to be tested.
  5716      * @return  {@code true} if the character may be part of a
  5717      *          Java identifier; {@code false} otherwise.
  5718      * @see     Character#isJavaLetter(char)
  5719      * @see     Character#isJavaIdentifierStart(char)
  5720      * @see     Character#isJavaIdentifierPart(char)
  5721      * @see     Character#isLetter(char)
  5722      * @see     Character#isLetterOrDigit(char)
  5723      * @see     Character#isUnicodeIdentifierPart(char)
  5724      * @see     Character#isIdentifierIgnorable(char)
  5725      * @since   1.02
  5726      * @deprecated Replaced by isJavaIdentifierPart(char).
  5727      */
  5728     @Deprecated
  5729     public static boolean isJavaLetterOrDigit(char ch) {
  5730         return isJavaIdentifierPart(ch);
  5731     }
  5732 
  5733     /**
  5734      * Determines if the specified character (Unicode code point) is an alphabet.
  5735      * <p>
  5736      * A character is considered to be alphabetic if its general category type,
  5737      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
  5738      * the following:
  5739      * <ul>
  5740      * <li> <code>UPPERCASE_LETTER</code>
  5741      * <li> <code>LOWERCASE_LETTER</code>
  5742      * <li> <code>TITLECASE_LETTER</code>
  5743      * <li> <code>MODIFIER_LETTER</code>
  5744      * <li> <code>OTHER_LETTER</code>
  5745      * <li> <code>LETTER_NUMBER</code>
  5746      * </ul>
  5747      * or it has contributory property Other_Alphabetic as defined by the
  5748      * Unicode Standard.
  5749      *
  5750      * @param   codePoint the character (Unicode code point) to be tested.
  5751      * @return  <code>true</code> if the character is a Unicode alphabet
  5752      *          character, <code>false</code> otherwise.
  5753      * @since   1.7
  5754      */
  5755     public static boolean isAlphabetic(int codePoint) {
  5756         return (((((1 << Character.UPPERCASE_LETTER) |
  5757             (1 << Character.LOWERCASE_LETTER) |
  5758             (1 << Character.TITLECASE_LETTER) |
  5759             (1 << Character.MODIFIER_LETTER) |
  5760             (1 << Character.OTHER_LETTER) |
  5761             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
  5762             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
  5763     }
  5764 
  5765     /**
  5766      * Determines if the specified character (Unicode code point) is a CJKV
  5767      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
  5768      * the Unicode Standard.
  5769      *
  5770      * @param   codePoint the character (Unicode code point) to be tested.
  5771      * @return  <code>true</code> if the character is a Unicode ideograph
  5772      *          character, <code>false</code> otherwise.
  5773      * @since   1.7
  5774      */
  5775     public static boolean isIdeographic(int codePoint) {
  5776         return CharacterData.of(codePoint).isIdeographic(codePoint);
  5777     }
  5778 
  5779     /**
  5780      * Determines if the specified character is
  5781      * permissible as the first character in a Java identifier.
  5782      * <p>
  5783      * A character may start a Java identifier if and only if
  5784      * one of the following conditions is true:
  5785      * <ul>
  5786      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
  5787      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
  5788      * <li> {@code ch} is a currency symbol (such as {@code '$'})
  5789      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
  5790      * </ul>
  5791      *
  5792      * <p><b>Note:</b> This method cannot handle <a
  5793      * href="#supplementary"> supplementary characters</a>. To support
  5794      * all Unicode characters, including supplementary characters, use
  5795      * the {@link #isJavaIdentifierStart(int)} method.
  5796      *
  5797      * @param   ch the character to be tested.
  5798      * @return  {@code true} if the character may start a Java identifier;
  5799      *          {@code false} otherwise.
  5800      * @see     Character#isJavaIdentifierPart(char)
  5801      * @see     Character#isLetter(char)
  5802      * @see     Character#isUnicodeIdentifierStart(char)
  5803      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  5804      * @since   1.1
  5805      */
  5806     public static boolean isJavaIdentifierStart(char ch) {
  5807         return isJavaIdentifierStart((int)ch);
  5808     }
  5809 
  5810     /**
  5811      * Determines if the character (Unicode code point) is
  5812      * permissible as the first character in a Java identifier.
  5813      * <p>
  5814      * A character may start a Java identifier if and only if
  5815      * one of the following conditions is true:
  5816      * <ul>
  5817      * <li> {@link #isLetter(int) isLetter(codePoint)}
  5818      *      returns {@code true}
  5819      * <li> {@link #getType(int) getType(codePoint)}
  5820      *      returns {@code LETTER_NUMBER}
  5821      * <li> the referenced character is a currency symbol (such as {@code '$'})
  5822      * <li> the referenced character is a connecting punctuation character
  5823      *      (such as {@code '_'}).
  5824      * </ul>
  5825      *
  5826      * @param   codePoint the character (Unicode code point) to be tested.
  5827      * @return  {@code true} if the character may start a Java identifier;
  5828      *          {@code false} otherwise.
  5829      * @see     Character#isJavaIdentifierPart(int)
  5830      * @see     Character#isLetter(int)
  5831      * @see     Character#isUnicodeIdentifierStart(int)
  5832      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  5833      * @since   1.5
  5834      */
  5835     public static boolean isJavaIdentifierStart(int codePoint) {
  5836         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
  5837     }
  5838 
  5839     /**
  5840      * Determines if the specified character may be part of a Java
  5841      * identifier as other than the first character.
  5842      * <p>
  5843      * A character may be part of a Java identifier if any of the following
  5844      * are true:
  5845      * <ul>
  5846      * <li>  it is a letter
  5847      * <li>  it is a currency symbol (such as {@code '$'})
  5848      * <li>  it is a connecting punctuation character (such as {@code '_'})
  5849      * <li>  it is a digit
  5850      * <li>  it is a numeric letter (such as a Roman numeral character)
  5851      * <li>  it is a combining mark
  5852      * <li>  it is a non-spacing mark
  5853      * <li> {@code isIdentifierIgnorable} returns
  5854      * {@code true} for the character
  5855      * </ul>
  5856      *
  5857      * <p><b>Note:</b> This method cannot handle <a
  5858      * href="#supplementary"> supplementary characters</a>. To support
  5859      * all Unicode characters, including supplementary characters, use
  5860      * the {@link #isJavaIdentifierPart(int)} method.
  5861      *
  5862      * @param   ch      the character to be tested.
  5863      * @return {@code true} if the character may be part of a
  5864      *          Java identifier; {@code false} otherwise.
  5865      * @see     Character#isIdentifierIgnorable(char)
  5866      * @see     Character#isJavaIdentifierStart(char)
  5867      * @see     Character#isLetterOrDigit(char)
  5868      * @see     Character#isUnicodeIdentifierPart(char)
  5869      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  5870      * @since   1.1
  5871      */
  5872     public static boolean isJavaIdentifierPart(char ch) {
  5873         return isJavaIdentifierPart((int)ch);
  5874     }
  5875 
  5876     /**
  5877      * Determines if the character (Unicode code point) may be part of a Java
  5878      * identifier as other than the first character.
  5879      * <p>
  5880      * A character may be part of a Java identifier if any of the following
  5881      * are true:
  5882      * <ul>
  5883      * <li>  it is a letter
  5884      * <li>  it is a currency symbol (such as {@code '$'})
  5885      * <li>  it is a connecting punctuation character (such as {@code '_'})
  5886      * <li>  it is a digit
  5887      * <li>  it is a numeric letter (such as a Roman numeral character)
  5888      * <li>  it is a combining mark
  5889      * <li>  it is a non-spacing mark
  5890      * <li> {@link #isIdentifierIgnorable(int)
  5891      * isIdentifierIgnorable(codePoint)} returns {@code true} for
  5892      * the character
  5893      * </ul>
  5894      *
  5895      * @param   codePoint the character (Unicode code point) to be tested.
  5896      * @return {@code true} if the character may be part of a
  5897      *          Java identifier; {@code false} otherwise.
  5898      * @see     Character#isIdentifierIgnorable(int)
  5899      * @see     Character#isJavaIdentifierStart(int)
  5900      * @see     Character#isLetterOrDigit(int)
  5901      * @see     Character#isUnicodeIdentifierPart(int)
  5902      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
  5903      * @since   1.5
  5904      */
  5905     public static boolean isJavaIdentifierPart(int codePoint) {
  5906         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
  5907     }
  5908 
  5909     /**
  5910      * Determines if the specified character is permissible as the
  5911      * first character in a Unicode identifier.
  5912      * <p>
  5913      * A character may start a Unicode identifier if and only if
  5914      * one of the following conditions is true:
  5915      * <ul>
  5916      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
  5917      * <li> {@link #getType(char) getType(ch)} returns
  5918      *      {@code LETTER_NUMBER}.
  5919      * </ul>
  5920      *
  5921      * <p><b>Note:</b> This method cannot handle <a
  5922      * href="#supplementary"> supplementary characters</a>. To support
  5923      * all Unicode characters, including supplementary characters, use
  5924      * the {@link #isUnicodeIdentifierStart(int)} method.
  5925      *
  5926      * @param   ch      the character to be tested.
  5927      * @return  {@code true} if the character may start a Unicode
  5928      *          identifier; {@code false} otherwise.
  5929      * @see     Character#isJavaIdentifierStart(char)
  5930      * @see     Character#isLetter(char)
  5931      * @see     Character#isUnicodeIdentifierPart(char)
  5932      * @since   1.1
  5933      */
  5934     public static boolean isUnicodeIdentifierStart(char ch) {
  5935         return isUnicodeIdentifierStart((int)ch);
  5936     }
  5937 
  5938     /**
  5939      * Determines if the specified character (Unicode code point) is permissible as the
  5940      * first character in a Unicode identifier.
  5941      * <p>
  5942      * A character may start a Unicode identifier if and only if
  5943      * one of the following conditions is true:
  5944      * <ul>
  5945      * <li> {@link #isLetter(int) isLetter(codePoint)}
  5946      *      returns {@code true}
  5947      * <li> {@link #getType(int) getType(codePoint)}
  5948      *      returns {@code LETTER_NUMBER}.
  5949      * </ul>
  5950      * @param   codePoint the character (Unicode code point) to be tested.
  5951      * @return  {@code true} if the character may start a Unicode
  5952      *          identifier; {@code false} otherwise.
  5953      * @see     Character#isJavaIdentifierStart(int)
  5954      * @see     Character#isLetter(int)
  5955      * @see     Character#isUnicodeIdentifierPart(int)
  5956      * @since   1.5
  5957      */
  5958     public static boolean isUnicodeIdentifierStart(int codePoint) {
  5959         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
  5960     }
  5961 
  5962     /**
  5963      * Determines if the specified character may be part of a Unicode
  5964      * identifier as other than the first character.
  5965      * <p>
  5966      * A character may be part of a Unicode identifier if and only if
  5967      * one of the following statements is true:
  5968      * <ul>
  5969      * <li>  it is a letter
  5970      * <li>  it is a connecting punctuation character (such as {@code '_'})
  5971      * <li>  it is a digit
  5972      * <li>  it is a numeric letter (such as a Roman numeral character)
  5973      * <li>  it is a combining mark
  5974      * <li>  it is a non-spacing mark
  5975      * <li> {@code isIdentifierIgnorable} returns
  5976      * {@code true} for this character.
  5977      * </ul>
  5978      *
  5979      * <p><b>Note:</b> This method cannot handle <a
  5980      * href="#supplementary"> supplementary characters</a>. To support
  5981      * all Unicode characters, including supplementary characters, use
  5982      * the {@link #isUnicodeIdentifierPart(int)} method.
  5983      *
  5984      * @param   ch      the character to be tested.
  5985      * @return  {@code true} if the character may be part of a
  5986      *          Unicode identifier; {@code false} otherwise.
  5987      * @see     Character#isIdentifierIgnorable(char)
  5988      * @see     Character#isJavaIdentifierPart(char)
  5989      * @see     Character#isLetterOrDigit(char)
  5990      * @see     Character#isUnicodeIdentifierStart(char)
  5991      * @since   1.1
  5992      */
  5993     public static boolean isUnicodeIdentifierPart(char ch) {
  5994         return isUnicodeIdentifierPart((int)ch);
  5995     }
  5996 
  5997     /**
  5998      * Determines if the specified character (Unicode code point) may be part of a Unicode
  5999      * identifier as other than the first character.
  6000      * <p>
  6001      * A character may be part of a Unicode identifier if and only if
  6002      * one of the following statements is true:
  6003      * <ul>
  6004      * <li>  it is a letter
  6005      * <li>  it is a connecting punctuation character (such as {@code '_'})
  6006      * <li>  it is a digit
  6007      * <li>  it is a numeric letter (such as a Roman numeral character)
  6008      * <li>  it is a combining mark
  6009      * <li>  it is a non-spacing mark
  6010      * <li> {@code isIdentifierIgnorable} returns
  6011      * {@code true} for this character.
  6012      * </ul>
  6013      * @param   codePoint the character (Unicode code point) to be tested.
  6014      * @return  {@code true} if the character may be part of a
  6015      *          Unicode identifier; {@code false} otherwise.
  6016      * @see     Character#isIdentifierIgnorable(int)
  6017      * @see     Character#isJavaIdentifierPart(int)
  6018      * @see     Character#isLetterOrDigit(int)
  6019      * @see     Character#isUnicodeIdentifierStart(int)
  6020      * @since   1.5
  6021      */
  6022     public static boolean isUnicodeIdentifierPart(int codePoint) {
  6023         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
  6024     }
  6025 
  6026     /**
  6027      * Determines if the specified character should be regarded as
  6028      * an ignorable character in a Java identifier or a Unicode identifier.
  6029      * <p>
  6030      * The following Unicode characters are ignorable in a Java identifier
  6031      * or a Unicode identifier:
  6032      * <ul>
  6033      * <li>ISO control characters that are not whitespace
  6034      * <ul>
  6035      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
  6036      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
  6037      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
  6038      * </ul>
  6039      *
  6040      * <li>all characters that have the {@code FORMAT} general
  6041      * category value
  6042      * </ul>
  6043      *
  6044      * <p><b>Note:</b> This method cannot handle <a
  6045      * href="#supplementary"> supplementary characters</a>. To support
  6046      * all Unicode characters, including supplementary characters, use
  6047      * the {@link #isIdentifierIgnorable(int)} method.
  6048      *
  6049      * @param   ch      the character to be tested.
  6050      * @return  {@code true} if the character is an ignorable control
  6051      *          character that may be part of a Java or Unicode identifier;
  6052      *           {@code false} otherwise.
  6053      * @see     Character#isJavaIdentifierPart(char)
  6054      * @see     Character#isUnicodeIdentifierPart(char)
  6055      * @since   1.1
  6056      */
  6057     public static boolean isIdentifierIgnorable(char ch) {
  6058         return isIdentifierIgnorable((int)ch);
  6059     }
  6060 
  6061     /**
  6062      * Determines if the specified character (Unicode code point) should be regarded as
  6063      * an ignorable character in a Java identifier or a Unicode identifier.
  6064      * <p>
  6065      * The following Unicode characters are ignorable in a Java identifier
  6066      * or a Unicode identifier:
  6067      * <ul>
  6068      * <li>ISO control characters that are not whitespace
  6069      * <ul>
  6070      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
  6071      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
  6072      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
  6073      * </ul>
  6074      *
  6075      * <li>all characters that have the {@code FORMAT} general
  6076      * category value
  6077      * </ul>
  6078      *
  6079      * @param   codePoint the character (Unicode code point) to be tested.
  6080      * @return  {@code true} if the character is an ignorable control
  6081      *          character that may be part of a Java or Unicode identifier;
  6082      *          {@code false} otherwise.
  6083      * @see     Character#isJavaIdentifierPart(int)
  6084      * @see     Character#isUnicodeIdentifierPart(int)
  6085      * @since   1.5
  6086      */
  6087     public static boolean isIdentifierIgnorable(int codePoint) {
  6088         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
  6089     }
  6090 
  6091     /**
  6092      * Converts the character argument to lowercase using case
  6093      * mapping information from the UnicodeData file.
  6094      * <p>
  6095      * Note that
  6096      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
  6097      * does not always return {@code true} for some ranges of
  6098      * characters, particularly those that are symbols or ideographs.
  6099      *
  6100      * <p>In general, {@link String#toLowerCase()} should be used to map
  6101      * characters to lowercase. {@code String} case mapping methods
  6102      * have several benefits over {@code Character} case mapping methods.
  6103      * {@code String} case mapping methods can perform locale-sensitive
  6104      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  6105      * the {@code Character} case mapping methods cannot.
  6106      *
  6107      * <p><b>Note:</b> This method cannot handle <a
  6108      * href="#supplementary"> supplementary characters</a>. To support
  6109      * all Unicode characters, including supplementary characters, use
  6110      * the {@link #toLowerCase(int)} method.
  6111      *
  6112      * @param   ch   the character to be converted.
  6113      * @return  the lowercase equivalent of the character, if any;
  6114      *          otherwise, the character itself.
  6115      * @see     Character#isLowerCase(char)
  6116      * @see     String#toLowerCase()
  6117      */
  6118     public static char toLowerCase(char ch) {
  6119         return (char)toLowerCase((int)ch);
  6120     }
  6121 
  6122     /**
  6123      * Converts the character (Unicode code point) argument to
  6124      * lowercase using case mapping information from the UnicodeData
  6125      * file.
  6126      *
  6127      * <p> Note that
  6128      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
  6129      * does not always return {@code true} for some ranges of
  6130      * characters, particularly those that are symbols or ideographs.
  6131      *
  6132      * <p>In general, {@link String#toLowerCase()} should be used to map
  6133      * characters to lowercase. {@code String} case mapping methods
  6134      * have several benefits over {@code Character} case mapping methods.
  6135      * {@code String} case mapping methods can perform locale-sensitive
  6136      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  6137      * the {@code Character} case mapping methods cannot.
  6138      *
  6139      * @param   codePoint   the character (Unicode code point) to be converted.
  6140      * @return  the lowercase equivalent of the character (Unicode code
  6141      *          point), if any; otherwise, the character itself.
  6142      * @see     Character#isLowerCase(int)
  6143      * @see     String#toLowerCase()
  6144      *
  6145      * @since   1.5
  6146      */
  6147     public static int toLowerCase(int codePoint) {
  6148         return CharacterData.of(codePoint).toLowerCase(codePoint);
  6149     }
  6150 
  6151     /**
  6152      * Converts the character argument to uppercase using case mapping
  6153      * information from the UnicodeData file.
  6154      * <p>
  6155      * Note that
  6156      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
  6157      * does not always return {@code true} for some ranges of
  6158      * characters, particularly those that are symbols or ideographs.
  6159      *
  6160      * <p>In general, {@link String#toUpperCase()} should be used to map
  6161      * characters to uppercase. {@code String} case mapping methods
  6162      * have several benefits over {@code Character} case mapping methods.
  6163      * {@code String} case mapping methods can perform locale-sensitive
  6164      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  6165      * the {@code Character} case mapping methods cannot.
  6166      *
  6167      * <p><b>Note:</b> This method cannot handle <a
  6168      * href="#supplementary"> supplementary characters</a>. To support
  6169      * all Unicode characters, including supplementary characters, use
  6170      * the {@link #toUpperCase(int)} method.
  6171      *
  6172      * @param   ch   the character to be converted.
  6173      * @return  the uppercase equivalent of the character, if any;
  6174      *          otherwise, the character itself.
  6175      * @see     Character#isUpperCase(char)
  6176      * @see     String#toUpperCase()
  6177      */
  6178     public static char toUpperCase(char ch) {
  6179         return (char)toUpperCase((int)ch);
  6180     }
  6181 
  6182     /**
  6183      * Converts the character (Unicode code point) argument to
  6184      * uppercase using case mapping information from the UnicodeData
  6185      * file.
  6186      *
  6187      * <p>Note that
  6188      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
  6189      * does not always return {@code true} for some ranges of
  6190      * characters, particularly those that are symbols or ideographs.
  6191      *
  6192      * <p>In general, {@link String#toUpperCase()} should be used to map
  6193      * characters to uppercase. {@code String} case mapping methods
  6194      * have several benefits over {@code Character} case mapping methods.
  6195      * {@code String} case mapping methods can perform locale-sensitive
  6196      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
  6197      * the {@code Character} case mapping methods cannot.
  6198      *
  6199      * @param   codePoint   the character (Unicode code point) to be converted.
  6200      * @return  the uppercase equivalent of the character, if any;
  6201      *          otherwise, the character itself.
  6202      * @see     Character#isUpperCase(int)
  6203      * @see     String#toUpperCase()
  6204      *
  6205      * @since   1.5
  6206      */
  6207     public static int toUpperCase(int codePoint) {
  6208         return CharacterData.of(codePoint).toUpperCase(codePoint);
  6209     }
  6210 
  6211     /**
  6212      * Converts the character argument to titlecase using case mapping
  6213      * information from the UnicodeData file. If a character has no
  6214      * explicit titlecase mapping and is not itself a titlecase char
  6215      * according to UnicodeData, then the uppercase mapping is
  6216      * returned as an equivalent titlecase mapping. If the
  6217      * {@code char} argument is already a titlecase
  6218      * {@code char}, the same {@code char} value will be
  6219      * returned.
  6220      * <p>
  6221      * Note that
  6222      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
  6223      * does not always return {@code true} for some ranges of
  6224      * characters.
  6225      *
  6226      * <p><b>Note:</b> This method cannot handle <a
  6227      * href="#supplementary"> supplementary characters</a>. To support
  6228      * all Unicode characters, including supplementary characters, use
  6229      * the {@link #toTitleCase(int)} method.
  6230      *
  6231      * @param   ch   the character to be converted.
  6232      * @return  the titlecase equivalent of the character, if any;
  6233      *          otherwise, the character itself.
  6234      * @see     Character#isTitleCase(char)
  6235      * @see     Character#toLowerCase(char)
  6236      * @see     Character#toUpperCase(char)
  6237      * @since   1.0.2
  6238      */
  6239     public static char toTitleCase(char ch) {
  6240         return (char)toTitleCase((int)ch);
  6241     }
  6242 
  6243     /**
  6244      * Converts the character (Unicode code point) argument to titlecase using case mapping
  6245      * information from the UnicodeData file. If a character has no
  6246      * explicit titlecase mapping and is not itself a titlecase char
  6247      * according to UnicodeData, then the uppercase mapping is
  6248      * returned as an equivalent titlecase mapping. If the
  6249      * character argument is already a titlecase
  6250      * character, the same character value will be
  6251      * returned.
  6252      *
  6253      * <p>Note that
  6254      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
  6255      * does not always return {@code true} for some ranges of
  6256      * characters.
  6257      *
  6258      * @param   codePoint   the character (Unicode code point) to be converted.
  6259      * @return  the titlecase equivalent of the character, if any;
  6260      *          otherwise, the character itself.
  6261      * @see     Character#isTitleCase(int)
  6262      * @see     Character#toLowerCase(int)
  6263      * @see     Character#toUpperCase(int)
  6264      * @since   1.5
  6265      */
  6266     public static int toTitleCase(int codePoint) {
  6267         return CharacterData.of(codePoint).toTitleCase(codePoint);
  6268     }
  6269 
  6270     /**
  6271      * Returns the numeric value of the character {@code ch} in the
  6272      * specified radix.
  6273      * <p>
  6274      * If the radix is not in the range {@code MIN_RADIX} &le;
  6275      * {@code radix} &le; {@code MAX_RADIX} or if the
  6276      * value of {@code ch} is not a valid digit in the specified
  6277      * radix, {@code -1} is returned. A character is a valid digit
  6278      * if at least one of the following is true:
  6279      * <ul>
  6280      * <li>The method {@code isDigit} is {@code true} of the character
  6281      *     and the Unicode decimal digit value of the character (or its
  6282      *     single-character decomposition) is less than the specified radix.
  6283      *     In this case the decimal digit value is returned.
  6284      * <li>The character is one of the uppercase Latin letters
  6285      *     {@code 'A'} through {@code 'Z'} and its code is less than
  6286      *     {@code radix + 'A' - 10}.
  6287      *     In this case, {@code ch - 'A' + 10}
  6288      *     is returned.
  6289      * <li>The character is one of the lowercase Latin letters
  6290      *     {@code 'a'} through {@code 'z'} and its code is less than
  6291      *     {@code radix + 'a' - 10}.
  6292      *     In this case, {@code ch - 'a' + 10}
  6293      *     is returned.
  6294      * <li>The character is one of the fullwidth uppercase Latin letters A
  6295      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  6296      *     and its code is less than
  6297      *     {@code radix + '\u005CuFF21' - 10}.
  6298      *     In this case, {@code ch - '\u005CuFF21' + 10}
  6299      *     is returned.
  6300      * <li>The character is one of the fullwidth lowercase Latin letters a
  6301      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  6302      *     and its code is less than
  6303      *     {@code radix + '\u005CuFF41' - 10}.
  6304      *     In this case, {@code ch - '\u005CuFF41' + 10}
  6305      *     is returned.
  6306      * </ul>
  6307      *
  6308      * <p><b>Note:</b> This method cannot handle <a
  6309      * href="#supplementary"> supplementary characters</a>. To support
  6310      * all Unicode characters, including supplementary characters, use
  6311      * the {@link #digit(int, int)} method.
  6312      *
  6313      * @param   ch      the character to be converted.
  6314      * @param   radix   the radix.
  6315      * @return  the numeric value represented by the character in the
  6316      *          specified radix.
  6317      * @see     Character#forDigit(int, int)
  6318      * @see     Character#isDigit(char)
  6319      */
  6320     public static int digit(char ch, int radix) {
  6321         return digit((int)ch, radix);
  6322     }
  6323 
  6324     /**
  6325      * Returns the numeric value of the specified character (Unicode
  6326      * code point) in the specified radix.
  6327      *
  6328      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
  6329      * {@code radix} &le; {@code MAX_RADIX} or if the
  6330      * character is not a valid digit in the specified
  6331      * radix, {@code -1} is returned. A character is a valid digit
  6332      * if at least one of the following is true:
  6333      * <ul>
  6334      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
  6335      *     and the Unicode decimal digit value of the character (or its
  6336      *     single-character decomposition) is less than the specified radix.
  6337      *     In this case the decimal digit value is returned.
  6338      * <li>The character is one of the uppercase Latin letters
  6339      *     {@code 'A'} through {@code 'Z'} and its code is less than
  6340      *     {@code radix + 'A' - 10}.
  6341      *     In this case, {@code codePoint - 'A' + 10}
  6342      *     is returned.
  6343      * <li>The character is one of the lowercase Latin letters
  6344      *     {@code 'a'} through {@code 'z'} and its code is less than
  6345      *     {@code radix + 'a' - 10}.
  6346      *     In this case, {@code codePoint - 'a' + 10}
  6347      *     is returned.
  6348      * <li>The character is one of the fullwidth uppercase Latin letters A
  6349      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
  6350      *     and its code is less than
  6351      *     {@code radix + '\u005CuFF21' - 10}.
  6352      *     In this case,
  6353      *     {@code codePoint - '\u005CuFF21' + 10}
  6354      *     is returned.
  6355      * <li>The character is one of the fullwidth lowercase Latin letters a
  6356      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
  6357      *     and its code is less than
  6358      *     {@code radix + '\u005CuFF41'- 10}.
  6359      *     In this case,
  6360      *     {@code codePoint - '\u005CuFF41' + 10}
  6361      *     is returned.
  6362      * </ul>
  6363      *
  6364      * @param   codePoint the character (Unicode code point) to be converted.
  6365      * @param   radix   the radix.
  6366      * @return  the numeric value represented by the character in the
  6367      *          specified radix.
  6368      * @see     Character#forDigit(int, int)
  6369      * @see     Character#isDigit(int)
  6370      * @since   1.5
  6371      */
  6372     public static int digit(int codePoint, int radix) {
  6373         return CharacterData.of(codePoint).digit(codePoint, radix);
  6374     }
  6375 
  6376     /**
  6377      * Returns the {@code int} value that the specified Unicode
  6378      * character represents. For example, the character
  6379      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
  6380      * an int with a value of 50.
  6381      * <p>
  6382      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  6383      * {@code '\u005Cu005A'}), lowercase
  6384      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  6385      * full width variant ({@code '\u005CuFF21'} through
  6386      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  6387      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  6388      * through 35. This is independent of the Unicode specification,
  6389      * which does not assign numeric values to these {@code char}
  6390      * values.
  6391      * <p>
  6392      * If the character does not have a numeric value, then -1 is returned.
  6393      * If the character has a numeric value that cannot be represented as a
  6394      * nonnegative integer (for example, a fractional value), then -2
  6395      * is returned.
  6396      *
  6397      * <p><b>Note:</b> This method cannot handle <a
  6398      * href="#supplementary"> supplementary characters</a>. To support
  6399      * all Unicode characters, including supplementary characters, use
  6400      * the {@link #getNumericValue(int)} method.
  6401      *
  6402      * @param   ch      the character to be converted.
  6403      * @return  the numeric value of the character, as a nonnegative {@code int}
  6404      *           value; -2 if the character has a numeric value that is not a
  6405      *          nonnegative integer; -1 if the character has no numeric value.
  6406      * @see     Character#forDigit(int, int)
  6407      * @see     Character#isDigit(char)
  6408      * @since   1.1
  6409      */
  6410     public static int getNumericValue(char ch) {
  6411         return getNumericValue((int)ch);
  6412     }
  6413 
  6414     /**
  6415      * Returns the {@code int} value that the specified
  6416      * character (Unicode code point) represents. For example, the character
  6417      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
  6418      * an {@code int} with a value of 50.
  6419      * <p>
  6420      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
  6421      * {@code '\u005Cu005A'}), lowercase
  6422      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
  6423      * full width variant ({@code '\u005CuFF21'} through
  6424      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
  6425      * {@code '\u005CuFF5A'}) forms have numeric values from 10
  6426      * through 35. This is independent of the Unicode specification,
  6427      * which does not assign numeric values to these {@code char}
  6428      * values.
  6429      * <p>
  6430      * If the character does not have a numeric value, then -1 is returned.
  6431      * If the character has a numeric value that cannot be represented as a
  6432      * nonnegative integer (for example, a fractional value), then -2
  6433      * is returned.
  6434      *
  6435      * @param   codePoint the character (Unicode code point) to be converted.
  6436      * @return  the numeric value of the character, as a nonnegative {@code int}
  6437      *          value; -2 if the character has a numeric value that is not a
  6438      *          nonnegative integer; -1 if the character has no numeric value.
  6439      * @see     Character#forDigit(int, int)
  6440      * @see     Character#isDigit(int)
  6441      * @since   1.5
  6442      */
  6443     public static int getNumericValue(int codePoint) {
  6444         return CharacterData.of(codePoint).getNumericValue(codePoint);
  6445     }
  6446 
  6447     /**
  6448      * Determines if the specified character is ISO-LATIN-1 white space.
  6449      * This method returns {@code true} for the following five
  6450      * characters only:
  6451      * <table>
  6452      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
  6453      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
  6454      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
  6455      *     <td>{@code NEW LINE}</td></tr>
  6456      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
  6457      *     <td>{@code FORM FEED}</td></tr>
  6458      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
  6459      *     <td>{@code CARRIAGE RETURN}</td></tr>
  6460      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
  6461      *     <td>{@code SPACE}</td></tr>
  6462      * </table>
  6463      *
  6464      * @param      ch   the character to be tested.
  6465      * @return     {@code true} if the character is ISO-LATIN-1 white
  6466      *             space; {@code false} otherwise.
  6467      * @see        Character#isSpaceChar(char)
  6468      * @see        Character#isWhitespace(char)
  6469      * @deprecated Replaced by isWhitespace(char).
  6470      */
  6471     @Deprecated
  6472     public static boolean isSpace(char ch) {
  6473         return (ch <= 0x0020) &&
  6474             (((((1L << 0x0009) |
  6475             (1L << 0x000A) |
  6476             (1L << 0x000C) |
  6477             (1L << 0x000D) |
  6478             (1L << 0x0020)) >> ch) & 1L) != 0);
  6479     }
  6480 
  6481 
  6482     /**
  6483      * Determines if the specified character is a Unicode space character.
  6484      * A character is considered to be a space character if and only if
  6485      * it is specified to be a space character by the Unicode Standard. This
  6486      * method returns true if the character's general category type is any of
  6487      * the following:
  6488      * <ul>
  6489      * <li> {@code SPACE_SEPARATOR}
  6490      * <li> {@code LINE_SEPARATOR}
  6491      * <li> {@code PARAGRAPH_SEPARATOR}
  6492      * </ul>
  6493      *
  6494      * <p><b>Note:</b> This method cannot handle <a
  6495      * href="#supplementary"> supplementary characters</a>. To support
  6496      * all Unicode characters, including supplementary characters, use
  6497      * the {@link #isSpaceChar(int)} method.
  6498      *
  6499      * @param   ch      the character to be tested.
  6500      * @return  {@code true} if the character is a space character;
  6501      *          {@code false} otherwise.
  6502      * @see     Character#isWhitespace(char)
  6503      * @since   1.1
  6504      */
  6505     public static boolean isSpaceChar(char ch) {
  6506         return isSpaceChar((int)ch);
  6507     }
  6508 
  6509     /**
  6510      * Determines if the specified character (Unicode code point) is a
  6511      * Unicode space character.  A character is considered to be a
  6512      * space character if and only if it is specified to be a space
  6513      * character by the Unicode Standard. This method returns true if
  6514      * the character's general category type is any of the following:
  6515      *
  6516      * <ul>
  6517      * <li> {@link #SPACE_SEPARATOR}
  6518      * <li> {@link #LINE_SEPARATOR}
  6519      * <li> {@link #PARAGRAPH_SEPARATOR}
  6520      * </ul>
  6521      *
  6522      * @param   codePoint the character (Unicode code point) to be tested.
  6523      * @return  {@code true} if the character is a space character;
  6524      *          {@code false} otherwise.
  6525      * @see     Character#isWhitespace(int)
  6526      * @since   1.5
  6527      */
  6528     public static boolean isSpaceChar(int codePoint) {
  6529         return ((((1 << Character.SPACE_SEPARATOR) |
  6530                   (1 << Character.LINE_SEPARATOR) |
  6531                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
  6532             != 0;
  6533     }
  6534 
  6535     /**
  6536      * Determines if the specified character is white space according to Java.
  6537      * A character is a Java whitespace character if and only if it satisfies
  6538      * one of the following criteria:
  6539      * <ul>
  6540      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
  6541      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
  6542      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  6543      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  6544      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  6545      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  6546      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  6547      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  6548      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  6549      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  6550      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  6551      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  6552      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  6553      * </ul>
  6554      *
  6555      * <p><b>Note:</b> This method cannot handle <a
  6556      * href="#supplementary"> supplementary characters</a>. To support
  6557      * all Unicode characters, including supplementary characters, use
  6558      * the {@link #isWhitespace(int)} method.
  6559      *
  6560      * @param   ch the character to be tested.
  6561      * @return  {@code true} if the character is a Java whitespace
  6562      *          character; {@code false} otherwise.
  6563      * @see     Character#isSpaceChar(char)
  6564      * @since   1.1
  6565      */
  6566     public static boolean isWhitespace(char ch) {
  6567         return isWhitespace((int)ch);
  6568     }
  6569 
  6570     /**
  6571      * Determines if the specified character (Unicode code point) is
  6572      * white space according to Java.  A character is a Java
  6573      * whitespace character if and only if it satisfies one of the
  6574      * following criteria:
  6575      * <ul>
  6576      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
  6577      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
  6578      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
  6579      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
  6580      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
  6581      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
  6582      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
  6583      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
  6584      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
  6585      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
  6586      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
  6587      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
  6588      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
  6589      * </ul>
  6590      * <p>
  6591      *
  6592      * @param   codePoint the character (Unicode code point) to be tested.
  6593      * @return  {@code true} if the character is a Java whitespace
  6594      *          character; {@code false} otherwise.
  6595      * @see     Character#isSpaceChar(int)
  6596      * @since   1.5
  6597      */
  6598     public static boolean isWhitespace(int codePoint) {
  6599         return CharacterData.of(codePoint).isWhitespace(codePoint);
  6600     }
  6601 
  6602     /**
  6603      * Determines if the specified character is an ISO control
  6604      * character.  A character is considered to be an ISO control
  6605      * character if its code is in the range {@code '\u005Cu0000'}
  6606      * through {@code '\u005Cu001F'} or in the range
  6607      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  6608      *
  6609      * <p><b>Note:</b> This method cannot handle <a
  6610      * href="#supplementary"> supplementary characters</a>. To support
  6611      * all Unicode characters, including supplementary characters, use
  6612      * the {@link #isISOControl(int)} method.
  6613      *
  6614      * @param   ch      the character to be tested.
  6615      * @return  {@code true} if the character is an ISO control character;
  6616      *          {@code false} otherwise.
  6617      *
  6618      * @see     Character#isSpaceChar(char)
  6619      * @see     Character#isWhitespace(char)
  6620      * @since   1.1
  6621      */
  6622     public static boolean isISOControl(char ch) {
  6623         return isISOControl((int)ch);
  6624     }
  6625 
  6626     /**
  6627      * Determines if the referenced character (Unicode code point) is an ISO control
  6628      * character.  A character is considered to be an ISO control
  6629      * character if its code is in the range {@code '\u005Cu0000'}
  6630      * through {@code '\u005Cu001F'} or in the range
  6631      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
  6632      *
  6633      * @param   codePoint the character (Unicode code point) to be tested.
  6634      * @return  {@code true} if the character is an ISO control character;
  6635      *          {@code false} otherwise.
  6636      * @see     Character#isSpaceChar(int)
  6637      * @see     Character#isWhitespace(int)
  6638      * @since   1.5
  6639      */
  6640     public static boolean isISOControl(int codePoint) {
  6641         // Optimized form of:
  6642         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
  6643         //     (codePoint >= 0x7F && codePoint <= 0x9F);
  6644         return codePoint <= 0x9F &&
  6645             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
  6646     }
  6647 
  6648     /**
  6649      * Returns a value indicating a character's general category.
  6650      *
  6651      * <p><b>Note:</b> This method cannot handle <a
  6652      * href="#supplementary"> supplementary characters</a>. To support
  6653      * all Unicode characters, including supplementary characters, use
  6654      * the {@link #getType(int)} method.
  6655      *
  6656      * @param   ch      the character to be tested.
  6657      * @return  a value of type {@code int} representing the
  6658      *          character's general category.
  6659      * @see     Character#COMBINING_SPACING_MARK
  6660      * @see     Character#CONNECTOR_PUNCTUATION
  6661      * @see     Character#CONTROL
  6662      * @see     Character#CURRENCY_SYMBOL
  6663      * @see     Character#DASH_PUNCTUATION
  6664      * @see     Character#DECIMAL_DIGIT_NUMBER
  6665      * @see     Character#ENCLOSING_MARK
  6666      * @see     Character#END_PUNCTUATION
  6667      * @see     Character#FINAL_QUOTE_PUNCTUATION
  6668      * @see     Character#FORMAT
  6669      * @see     Character#INITIAL_QUOTE_PUNCTUATION
  6670      * @see     Character#LETTER_NUMBER
  6671      * @see     Character#LINE_SEPARATOR
  6672      * @see     Character#LOWERCASE_LETTER
  6673      * @see     Character#MATH_SYMBOL
  6674      * @see     Character#MODIFIER_LETTER
  6675      * @see     Character#MODIFIER_SYMBOL
  6676      * @see     Character#NON_SPACING_MARK
  6677      * @see     Character#OTHER_LETTER
  6678      * @see     Character#OTHER_NUMBER
  6679      * @see     Character#OTHER_PUNCTUATION
  6680      * @see     Character#OTHER_SYMBOL
  6681      * @see     Character#PARAGRAPH_SEPARATOR
  6682      * @see     Character#PRIVATE_USE
  6683      * @see     Character#SPACE_SEPARATOR
  6684      * @see     Character#START_PUNCTUATION
  6685      * @see     Character#SURROGATE
  6686      * @see     Character#TITLECASE_LETTER
  6687      * @see     Character#UNASSIGNED
  6688      * @see     Character#UPPERCASE_LETTER
  6689      * @since   1.1
  6690      */
  6691     public static int getType(char ch) {
  6692         return getType((int)ch);
  6693     }
  6694 
  6695     /**
  6696      * Returns a value indicating a character's general category.
  6697      *
  6698      * @param   codePoint the character (Unicode code point) to be tested.
  6699      * @return  a value of type {@code int} representing the
  6700      *          character's general category.
  6701      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
  6702      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
  6703      * @see     Character#CONTROL CONTROL
  6704      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
  6705      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
  6706      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
  6707      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
  6708      * @see     Character#END_PUNCTUATION END_PUNCTUATION
  6709      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
  6710      * @see     Character#FORMAT FORMAT
  6711      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
  6712      * @see     Character#LETTER_NUMBER LETTER_NUMBER
  6713      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
  6714      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
  6715      * @see     Character#MATH_SYMBOL MATH_SYMBOL
  6716      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
  6717      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
  6718      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
  6719      * @see     Character#OTHER_LETTER OTHER_LETTER
  6720      * @see     Character#OTHER_NUMBER OTHER_NUMBER
  6721      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
  6722      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
  6723      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
  6724      * @see     Character#PRIVATE_USE PRIVATE_USE
  6725      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
  6726      * @see     Character#START_PUNCTUATION START_PUNCTUATION
  6727      * @see     Character#SURROGATE SURROGATE
  6728      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
  6729      * @see     Character#UNASSIGNED UNASSIGNED
  6730      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
  6731      * @since   1.5
  6732      */
  6733     public static int getType(int codePoint) {
  6734         return CharacterData.of(codePoint).getType(codePoint);
  6735     }
  6736 
  6737     /**
  6738      * Determines the character representation for a specific digit in
  6739      * the specified radix. If the value of {@code radix} is not a
  6740      * valid radix, or the value of {@code digit} is not a valid
  6741      * digit in the specified radix, the null character
  6742      * ({@code '\u005Cu0000'}) is returned.
  6743      * <p>
  6744      * The {@code radix} argument is valid if it is greater than or
  6745      * equal to {@code MIN_RADIX} and less than or equal to
  6746      * {@code MAX_RADIX}. The {@code digit} argument is valid if
  6747      * {@code 0 <= digit < radix}.
  6748      * <p>
  6749      * If the digit is less than 10, then
  6750      * {@code '0' + digit} is returned. Otherwise, the value
  6751      * {@code 'a' + digit - 10} is returned.
  6752      *
  6753      * @param   digit   the number to convert to a character.
  6754      * @param   radix   the radix.
  6755      * @return  the {@code char} representation of the specified digit
  6756      *          in the specified radix.
  6757      * @see     Character#MIN_RADIX
  6758      * @see     Character#MAX_RADIX
  6759      * @see     Character#digit(char, int)
  6760      */
  6761     public static char forDigit(int digit, int radix) {
  6762         if ((digit >= radix) || (digit < 0)) {
  6763             return '\0';
  6764         }
  6765         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
  6766             return '\0';
  6767         }
  6768         if (digit < 10) {
  6769             return (char)('0' + digit);
  6770         }
  6771         return (char)('a' - 10 + digit);
  6772     }
  6773 
  6774     /**
  6775      * Returns the Unicode directionality property for the given
  6776      * character.  Character directionality is used to calculate the
  6777      * visual ordering of text. The directionality value of undefined
  6778      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
  6779      *
  6780      * <p><b>Note:</b> This method cannot handle <a
  6781      * href="#supplementary"> supplementary characters</a>. To support
  6782      * all Unicode characters, including supplementary characters, use
  6783      * the {@link #getDirectionality(int)} method.
  6784      *
  6785      * @param  ch {@code char} for which the directionality property
  6786      *            is requested.
  6787      * @return the directionality property of the {@code char} value.
  6788      *
  6789      * @see Character#DIRECTIONALITY_UNDEFINED
  6790      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
  6791      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
  6792      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
  6793      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
  6794      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
  6795      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
  6796      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
  6797      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
  6798      * @see Character#DIRECTIONALITY_NONSPACING_MARK
  6799      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
  6800      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
  6801      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
  6802      * @see Character#DIRECTIONALITY_WHITESPACE
  6803      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
  6804      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
  6805      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
  6806      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
  6807      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
  6808      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
  6809      * @since 1.4
  6810      */
  6811     public static byte getDirectionality(char ch) {
  6812         return getDirectionality((int)ch);
  6813     }
  6814 
  6815     /**
  6816      * Returns the Unicode directionality property for the given
  6817      * character (Unicode code point).  Character directionality is
  6818      * used to calculate the visual ordering of text. The
  6819      * directionality value of undefined character is {@link
  6820      * #DIRECTIONALITY_UNDEFINED}.
  6821      *
  6822      * @param   codePoint the character (Unicode code point) for which
  6823      *          the directionality property is requested.
  6824      * @return the directionality property of the character.
  6825      *
  6826      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
  6827      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
  6828      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
  6829      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
  6830      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
  6831      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
  6832      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
  6833      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
  6834      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
  6835      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
  6836      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
  6837      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
  6838      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
  6839      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
  6840      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
  6841      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
  6842      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
  6843      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
  6844      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
  6845      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
  6846      * @since    1.5
  6847      */
  6848     public static byte getDirectionality(int codePoint) {
  6849         return CharacterData.of(codePoint).getDirectionality(codePoint);
  6850     }
  6851 
  6852     /**
  6853      * Determines whether the character is mirrored according to the
  6854      * Unicode specification.  Mirrored characters should have their
  6855      * glyphs horizontally mirrored when displayed in text that is
  6856      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
  6857      * PARENTHESIS is semantically defined to be an <i>opening
  6858      * parenthesis</i>.  This will appear as a "(" in text that is
  6859      * left-to-right but as a ")" in text that is right-to-left.
  6860      *
  6861      * <p><b>Note:</b> This method cannot handle <a
  6862      * href="#supplementary"> supplementary characters</a>. To support
  6863      * all Unicode characters, including supplementary characters, use
  6864      * the {@link #isMirrored(int)} method.
  6865      *
  6866      * @param  ch {@code char} for which the mirrored property is requested
  6867      * @return {@code true} if the char is mirrored, {@code false}
  6868      *         if the {@code char} is not mirrored or is not defined.
  6869      * @since 1.4
  6870      */
  6871     public static boolean isMirrored(char ch) {
  6872         return isMirrored((int)ch);
  6873     }
  6874 
  6875     /**
  6876      * Determines whether the specified character (Unicode code point)
  6877      * is mirrored according to the Unicode specification.  Mirrored
  6878      * characters should have their glyphs horizontally mirrored when
  6879      * displayed in text that is right-to-left.  For example,
  6880      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
  6881      * defined to be an <i>opening parenthesis</i>.  This will appear
  6882      * as a "(" in text that is left-to-right but as a ")" in text
  6883      * that is right-to-left.
  6884      *
  6885      * @param   codePoint the character (Unicode code point) to be tested.
  6886      * @return  {@code true} if the character is mirrored, {@code false}
  6887      *          if the character is not mirrored or is not defined.
  6888      * @since   1.5
  6889      */
  6890     public static boolean isMirrored(int codePoint) {
  6891         return CharacterData.of(codePoint).isMirrored(codePoint);
  6892     }
  6893 
  6894     /**
  6895      * Compares two {@code Character} objects numerically.
  6896      *
  6897      * @param   anotherCharacter   the {@code Character} to be compared.
  6898 
  6899      * @return  the value {@code 0} if the argument {@code Character}
  6900      *          is equal to this {@code Character}; a value less than
  6901      *          {@code 0} if this {@code Character} is numerically less
  6902      *          than the {@code Character} argument; and a value greater than
  6903      *          {@code 0} if this {@code Character} is numerically greater
  6904      *          than the {@code Character} argument (unsigned comparison).
  6905      *          Note that this is strictly a numerical comparison; it is not
  6906      *          locale-dependent.
  6907      * @since   1.2
  6908      */
  6909     public int compareTo(Character anotherCharacter) {
  6910         return compare(this.value, anotherCharacter.value);
  6911     }
  6912 
  6913     /**
  6914      * Compares two {@code char} values numerically.
  6915      * The value returned is identical to what would be returned by:
  6916      * <pre>
  6917      *    Character.valueOf(x).compareTo(Character.valueOf(y))
  6918      * </pre>
  6919      *
  6920      * @param  x the first {@code char} to compare
  6921      * @param  y the second {@code char} to compare
  6922      * @return the value {@code 0} if {@code x == y};
  6923      *         a value less than {@code 0} if {@code x < y}; and
  6924      *         a value greater than {@code 0} if {@code x > y}
  6925      * @since 1.7
  6926      */
  6927     public static int compare(char x, char y) {
  6928         return x - y;
  6929     }
  6930 
  6931     /**
  6932      * Converts the character (Unicode code point) argument to uppercase using
  6933      * information from the UnicodeData file.
  6934      * <p>
  6935      *
  6936      * @param   codePoint   the character (Unicode code point) to be converted.
  6937      * @return  either the uppercase equivalent of the character, if
  6938      *          any, or an error flag ({@code Character.ERROR})
  6939      *          that indicates that a 1:M {@code char} mapping exists.
  6940      * @see     Character#isLowerCase(char)
  6941      * @see     Character#isUpperCase(char)
  6942      * @see     Character#toLowerCase(char)
  6943      * @see     Character#toTitleCase(char)
  6944      * @since 1.4
  6945      */
  6946     static int toUpperCaseEx(int codePoint) {
  6947         assert isValidCodePoint(codePoint);
  6948         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
  6949     }
  6950 
  6951     /**
  6952      * Converts the character (Unicode code point) argument to uppercase using case
  6953      * mapping information from the SpecialCasing file in the Unicode
  6954      * specification. If a character has no explicit uppercase
  6955      * mapping, then the {@code char} itself is returned in the
  6956      * {@code char[]}.
  6957      *
  6958      * @param   codePoint   the character (Unicode code point) to be converted.
  6959      * @return a {@code char[]} with the uppercased character.
  6960      * @since 1.4
  6961      */
  6962     static char[] toUpperCaseCharArray(int codePoint) {
  6963         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
  6964         assert isBmpCodePoint(codePoint);
  6965         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
  6966     }
  6967 
  6968     /**
  6969      * The number of bits used to represent a <tt>char</tt> value in unsigned
  6970      * binary form, constant {@code 16}.
  6971      *
  6972      * @since 1.5
  6973      */
  6974     public static final int SIZE = 16;
  6975 
  6976     /**
  6977      * Returns the value obtained by reversing the order of the bytes in the
  6978      * specified <tt>char</tt> value.
  6979      *
  6980      * @return the value obtained by reversing (or, equivalently, swapping)
  6981      *     the bytes in the specified <tt>char</tt> value.
  6982      * @since 1.5
  6983      */
  6984     public static char reverseBytes(char ch) {
  6985         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
  6986     }
  6987 
  6988     /**
  6989      * Returns the Unicode name of the specified character
  6990      * {@code codePoint}, or null if the code point is
  6991      * {@link #UNASSIGNED unassigned}.
  6992      * <p>
  6993      * Note: if the specified character is not assigned a name by
  6994      * the <i>UnicodeData</i> file (part of the Unicode Character
  6995      * Database maintained by the Unicode Consortium), the returned
  6996      * name is the same as the result of expression.
  6997      *
  6998      * <blockquote>{@code
  6999      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
  7000      *     + " "
  7001      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
  7002      *
  7003      * }</blockquote>
  7004      *
  7005      * @param  codePoint the character (Unicode code point)
  7006      *
  7007      * @return the Unicode name of the specified character, or null if
  7008      *         the code point is unassigned.
  7009      *
  7010      * @exception IllegalArgumentException if the specified
  7011      *            {@code codePoint} is not a valid Unicode
  7012      *            code point.
  7013      *
  7014      * @since 1.7
  7015      */
  7016     public static String getName(int codePoint) {
  7017         if (!isValidCodePoint(codePoint)) {
  7018             throw new IllegalArgumentException();
  7019         }
  7020         String name = CharacterName.get(codePoint);
  7021         if (name != null)
  7022             return name;
  7023         if (getType(codePoint) == UNASSIGNED)
  7024             return null;
  7025         UnicodeBlock block = UnicodeBlock.of(codePoint);
  7026         if (block != null)
  7027             return block.toString().replace('_', ' ') + " "
  7028                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
  7029         // should never come here
  7030         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
  7031     }
  7032 }