rt/emul/compact/src/main/java/java/util/regex/UnicodeProp.java
author Jaroslav Tulach <jtulach@netbeans.org>
Mon, 07 Oct 2013 16:13:27 +0200
branchjdk7-b147
changeset 1348 bca65655b36b
permissions -rw-r--r--
Adding RegEx implementation
     1 /*
     2  * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.util.regex;
    27 
    28 import java.util.HashMap;
    29 import java.util.Locale;
    30 
    31 enum UnicodeProp {
    32 
    33     ALPHABETIC {
    34         public boolean is(int ch) {
    35             return Character.isAlphabetic(ch);
    36         }
    37     },
    38 
    39     LETTER {
    40         public boolean is(int ch) {
    41             return Character.isLetter(ch);
    42         }
    43     },
    44 
    45     IDEOGRAPHIC {
    46         public boolean is(int ch) {
    47             return Character.isIdeographic(ch);
    48         }
    49     },
    50 
    51     LOWERCASE {
    52         public boolean is(int ch) {
    53             return Character.isLowerCase(ch);
    54         }
    55     },
    56 
    57     UPPERCASE {
    58         public boolean is(int ch) {
    59             return Character.isUpperCase(ch);
    60         }
    61     },
    62 
    63     TITLECASE {
    64         public boolean is(int ch) {
    65             return Character.isTitleCase(ch);
    66         }
    67     },
    68 
    69     WHITE_SPACE {
    70         // \p{Whitespace}
    71         public boolean is(int ch) {
    72             return ((((1 << Character.SPACE_SEPARATOR) |
    73                       (1 << Character.LINE_SEPARATOR) |
    74                       (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
    75                    != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
    76         }
    77     },
    78 
    79     CONTROL {
    80         // \p{gc=Control}
    81         public boolean is(int ch) {
    82             return Character.getType(ch) == Character.CONTROL;
    83         }
    84     },
    85 
    86     PUNCTUATION {
    87         // \p{gc=Punctuation}
    88         public boolean is(int ch) {
    89             return ((((1 << Character.CONNECTOR_PUNCTUATION) |
    90                       (1 << Character.DASH_PUNCTUATION) |
    91                       (1 << Character.START_PUNCTUATION) |
    92                       (1 << Character.END_PUNCTUATION) |
    93                       (1 << Character.OTHER_PUNCTUATION) |
    94                       (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
    95                       (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
    96                    != 0;
    97         }
    98     },
    99 
   100     HEX_DIGIT {
   101         // \p{gc=Decimal_Number}
   102         // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
   103         public boolean is(int ch) {
   104             return DIGIT.is(ch) ||
   105                    (ch >= 0x0030 && ch <= 0x0039) ||
   106                    (ch >= 0x0041 && ch <= 0x0046) ||
   107                    (ch >= 0x0061 && ch <= 0x0066) ||
   108                    (ch >= 0xFF10 && ch <= 0xFF19) ||
   109                    (ch >= 0xFF21 && ch <= 0xFF26) ||
   110                    (ch >= 0xFF41 && ch <= 0xFF46);
   111         }
   112     },
   113 
   114     ASSIGNED {
   115         public boolean is(int ch) {
   116             return Character.getType(ch) != Character.UNASSIGNED;
   117         }
   118     },
   119 
   120     NONCHARACTER_CODE_POINT {
   121         // PropList.txt:Noncharacter_Code_Point
   122         public boolean is(int ch) {
   123             return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
   124         }
   125     },
   126 
   127     DIGIT {
   128         // \p{gc=Decimal_Number}
   129         public boolean is(int ch) {
   130             return Character.isDigit(ch);
   131         }
   132     },
   133 
   134     ALNUM {
   135         // \p{alpha}
   136         // \p{digit}
   137         public boolean is(int ch) {
   138             return ALPHABETIC.is(ch) || DIGIT.is(ch);
   139         }
   140     },
   141 
   142     BLANK {
   143         // \p{Whitespace} --
   144         // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
   145         //  \p{gc=Line_Separator}
   146         //  \p{gc=Paragraph_Separator}]
   147         public boolean is(int ch) {
   148             return Character.getType(ch) == Character.SPACE_SEPARATOR ||
   149                    ch == 0x9; // \N{HT}
   150         }
   151     },
   152 
   153     GRAPH {
   154         // [^
   155         //  \p{space}
   156         //  \p{gc=Control}
   157         //  \p{gc=Surrogate}
   158         //  \p{gc=Unassigned}]
   159         public boolean is(int ch) {
   160             return ((((1 << Character.SPACE_SEPARATOR) |
   161                       (1 << Character.LINE_SEPARATOR) |
   162                       (1 << Character.PARAGRAPH_SEPARATOR) |
   163                       (1 << Character.CONTROL) |
   164                       (1 << Character.SURROGATE) |
   165                       (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
   166                    == 0;
   167         }
   168     },
   169 
   170     PRINT {
   171         // \p{graph}
   172         // \p{blank}
   173         // -- \p{cntrl}
   174         public boolean is(int ch) {
   175             return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
   176         }
   177     },
   178 
   179     WORD {
   180         //  \p{alpha}
   181         //  \p{gc=Mark}
   182         //  \p{digit}
   183         //  \p{gc=Connector_Punctuation}
   184 
   185         public boolean is(int ch) {
   186             return ALPHABETIC.is(ch) ||
   187                    ((((1 << Character.NON_SPACING_MARK) |
   188                       (1 << Character.ENCLOSING_MARK) |
   189                       (1 << Character.COMBINING_SPACING_MARK) |
   190                       (1 << Character.DECIMAL_DIGIT_NUMBER) |
   191                       (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
   192                    != 0;
   193         }
   194     };
   195 
   196     private final static HashMap<String, String> posix = new HashMap<>();
   197     private final static HashMap<String, String> aliases = new HashMap<>();
   198     static {
   199         posix.put("ALPHA", "ALPHABETIC");
   200         posix.put("LOWER", "LOWERCASE");
   201         posix.put("UPPER", "UPPERCASE");
   202         posix.put("SPACE", "WHITE_SPACE");
   203         posix.put("PUNCT", "PUNCTUATION");
   204         posix.put("XDIGIT","HEX_DIGIT");
   205         posix.put("ALNUM", "ALNUM");
   206         posix.put("CNTRL", "CONTROL");
   207         posix.put("DIGIT", "DIGIT");
   208         posix.put("BLANK", "BLANK");
   209         posix.put("GRAPH", "GRAPH");
   210         posix.put("PRINT", "PRINT");
   211 
   212         aliases.put("WHITESPACE", "WHITE_SPACE");
   213         aliases.put("HEXDIGIT","HEX_DIGIT");
   214         aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
   215     }
   216 
   217     public static UnicodeProp forName(String propName) {
   218         propName = propName.toUpperCase(Locale.ENGLISH);
   219         String alias = aliases.get(propName);
   220         if (alias != null)
   221             propName = alias;
   222         try {
   223             return valueOf (propName);
   224         } catch (IllegalArgumentException x) {}
   225         return null;
   226     }
   227 
   228     public static UnicodeProp forPOSIXName(String propName) {
   229         propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
   230         if (propName == null)
   231             return null;
   232         return valueOf (propName);
   233     }
   234 
   235     public abstract boolean is(int ch);
   236 }