rt/emul/compact/src/main/java/java/util/regex/UnicodeProp.java
author Jaroslav Tulach <jtulach@netbeans.org>
Mon, 07 Oct 2013 16:13:27 +0200
branchjdk7-b147
changeset 1348 bca65655b36b
permissions -rw-r--r--
Adding RegEx implementation
jtulach@1348
     1
/*
jtulach@1348
     2
 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
jtulach@1348
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jtulach@1348
     4
 *
jtulach@1348
     5
 * This code is free software; you can redistribute it and/or modify it
jtulach@1348
     6
 * under the terms of the GNU General Public License version 2 only, as
jtulach@1348
     7
 * published by the Free Software Foundation.  Oracle designates this
jtulach@1348
     8
 * particular file as subject to the "Classpath" exception as provided
jtulach@1348
     9
 * by Oracle in the LICENSE file that accompanied this code.
jtulach@1348
    10
 *
jtulach@1348
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
jtulach@1348
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jtulach@1348
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
jtulach@1348
    14
 * version 2 for more details (a copy is included in the LICENSE file that
jtulach@1348
    15
 * accompanied this code).
jtulach@1348
    16
 *
jtulach@1348
    17
 * You should have received a copy of the GNU General Public License version
jtulach@1348
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
jtulach@1348
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jtulach@1348
    20
 *
jtulach@1348
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jtulach@1348
    22
 * or visit www.oracle.com if you need additional information or have any
jtulach@1348
    23
 * questions.
jtulach@1348
    24
 */
jtulach@1348
    25
jtulach@1348
    26
package java.util.regex;
jtulach@1348
    27
jtulach@1348
    28
import java.util.HashMap;
jtulach@1348
    29
import java.util.Locale;
jtulach@1348
    30
jtulach@1348
    31
enum UnicodeProp {
jtulach@1348
    32
jtulach@1348
    33
    ALPHABETIC {
jtulach@1348
    34
        public boolean is(int ch) {
jtulach@1348
    35
            return Character.isAlphabetic(ch);
jtulach@1348
    36
        }
jtulach@1348
    37
    },
jtulach@1348
    38
jtulach@1348
    39
    LETTER {
jtulach@1348
    40
        public boolean is(int ch) {
jtulach@1348
    41
            return Character.isLetter(ch);
jtulach@1348
    42
        }
jtulach@1348
    43
    },
jtulach@1348
    44
jtulach@1348
    45
    IDEOGRAPHIC {
jtulach@1348
    46
        public boolean is(int ch) {
jtulach@1348
    47
            return Character.isIdeographic(ch);
jtulach@1348
    48
        }
jtulach@1348
    49
    },
jtulach@1348
    50
jtulach@1348
    51
    LOWERCASE {
jtulach@1348
    52
        public boolean is(int ch) {
jtulach@1348
    53
            return Character.isLowerCase(ch);
jtulach@1348
    54
        }
jtulach@1348
    55
    },
jtulach@1348
    56
jtulach@1348
    57
    UPPERCASE {
jtulach@1348
    58
        public boolean is(int ch) {
jtulach@1348
    59
            return Character.isUpperCase(ch);
jtulach@1348
    60
        }
jtulach@1348
    61
    },
jtulach@1348
    62
jtulach@1348
    63
    TITLECASE {
jtulach@1348
    64
        public boolean is(int ch) {
jtulach@1348
    65
            return Character.isTitleCase(ch);
jtulach@1348
    66
        }
jtulach@1348
    67
    },
jtulach@1348
    68
jtulach@1348
    69
    WHITE_SPACE {
jtulach@1348
    70
        // \p{Whitespace}
jtulach@1348
    71
        public boolean is(int ch) {
jtulach@1348
    72
            return ((((1 << Character.SPACE_SEPARATOR) |
jtulach@1348
    73
                      (1 << Character.LINE_SEPARATOR) |
jtulach@1348
    74
                      (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
jtulach@1348
    75
                   != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
jtulach@1348
    76
        }
jtulach@1348
    77
    },
jtulach@1348
    78
jtulach@1348
    79
    CONTROL {
jtulach@1348
    80
        // \p{gc=Control}
jtulach@1348
    81
        public boolean is(int ch) {
jtulach@1348
    82
            return Character.getType(ch) == Character.CONTROL;
jtulach@1348
    83
        }
jtulach@1348
    84
    },
jtulach@1348
    85
jtulach@1348
    86
    PUNCTUATION {
jtulach@1348
    87
        // \p{gc=Punctuation}
jtulach@1348
    88
        public boolean is(int ch) {
jtulach@1348
    89
            return ((((1 << Character.CONNECTOR_PUNCTUATION) |
jtulach@1348
    90
                      (1 << Character.DASH_PUNCTUATION) |
jtulach@1348
    91
                      (1 << Character.START_PUNCTUATION) |
jtulach@1348
    92
                      (1 << Character.END_PUNCTUATION) |
jtulach@1348
    93
                      (1 << Character.OTHER_PUNCTUATION) |
jtulach@1348
    94
                      (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
jtulach@1348
    95
                      (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
jtulach@1348
    96
                   != 0;
jtulach@1348
    97
        }
jtulach@1348
    98
    },
jtulach@1348
    99
jtulach@1348
   100
    HEX_DIGIT {
jtulach@1348
   101
        // \p{gc=Decimal_Number}
jtulach@1348
   102
        // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
jtulach@1348
   103
        public boolean is(int ch) {
jtulach@1348
   104
            return DIGIT.is(ch) ||
jtulach@1348
   105
                   (ch >= 0x0030 && ch <= 0x0039) ||
jtulach@1348
   106
                   (ch >= 0x0041 && ch <= 0x0046) ||
jtulach@1348
   107
                   (ch >= 0x0061 && ch <= 0x0066) ||
jtulach@1348
   108
                   (ch >= 0xFF10 && ch <= 0xFF19) ||
jtulach@1348
   109
                   (ch >= 0xFF21 && ch <= 0xFF26) ||
jtulach@1348
   110
                   (ch >= 0xFF41 && ch <= 0xFF46);
jtulach@1348
   111
        }
jtulach@1348
   112
    },
jtulach@1348
   113
jtulach@1348
   114
    ASSIGNED {
jtulach@1348
   115
        public boolean is(int ch) {
jtulach@1348
   116
            return Character.getType(ch) != Character.UNASSIGNED;
jtulach@1348
   117
        }
jtulach@1348
   118
    },
jtulach@1348
   119
jtulach@1348
   120
    NONCHARACTER_CODE_POINT {
jtulach@1348
   121
        // PropList.txt:Noncharacter_Code_Point
jtulach@1348
   122
        public boolean is(int ch) {
jtulach@1348
   123
            return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
jtulach@1348
   124
        }
jtulach@1348
   125
    },
jtulach@1348
   126
jtulach@1348
   127
    DIGIT {
jtulach@1348
   128
        // \p{gc=Decimal_Number}
jtulach@1348
   129
        public boolean is(int ch) {
jtulach@1348
   130
            return Character.isDigit(ch);
jtulach@1348
   131
        }
jtulach@1348
   132
    },
jtulach@1348
   133
jtulach@1348
   134
    ALNUM {
jtulach@1348
   135
        // \p{alpha}
jtulach@1348
   136
        // \p{digit}
jtulach@1348
   137
        public boolean is(int ch) {
jtulach@1348
   138
            return ALPHABETIC.is(ch) || DIGIT.is(ch);
jtulach@1348
   139
        }
jtulach@1348
   140
    },
jtulach@1348
   141
jtulach@1348
   142
    BLANK {
jtulach@1348
   143
        // \p{Whitespace} --
jtulach@1348
   144
        // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
jtulach@1348
   145
        //  \p{gc=Line_Separator}
jtulach@1348
   146
        //  \p{gc=Paragraph_Separator}]
jtulach@1348
   147
        public boolean is(int ch) {
jtulach@1348
   148
            return Character.getType(ch) == Character.SPACE_SEPARATOR ||
jtulach@1348
   149
                   ch == 0x9; // \N{HT}
jtulach@1348
   150
        }
jtulach@1348
   151
    },
jtulach@1348
   152
jtulach@1348
   153
    GRAPH {
jtulach@1348
   154
        // [^
jtulach@1348
   155
        //  \p{space}
jtulach@1348
   156
        //  \p{gc=Control}
jtulach@1348
   157
        //  \p{gc=Surrogate}
jtulach@1348
   158
        //  \p{gc=Unassigned}]
jtulach@1348
   159
        public boolean is(int ch) {
jtulach@1348
   160
            return ((((1 << Character.SPACE_SEPARATOR) |
jtulach@1348
   161
                      (1 << Character.LINE_SEPARATOR) |
jtulach@1348
   162
                      (1 << Character.PARAGRAPH_SEPARATOR) |
jtulach@1348
   163
                      (1 << Character.CONTROL) |
jtulach@1348
   164
                      (1 << Character.SURROGATE) |
jtulach@1348
   165
                      (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
jtulach@1348
   166
                   == 0;
jtulach@1348
   167
        }
jtulach@1348
   168
    },
jtulach@1348
   169
jtulach@1348
   170
    PRINT {
jtulach@1348
   171
        // \p{graph}
jtulach@1348
   172
        // \p{blank}
jtulach@1348
   173
        // -- \p{cntrl}
jtulach@1348
   174
        public boolean is(int ch) {
jtulach@1348
   175
            return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
jtulach@1348
   176
        }
jtulach@1348
   177
    },
jtulach@1348
   178
jtulach@1348
   179
    WORD {
jtulach@1348
   180
        //  \p{alpha}
jtulach@1348
   181
        //  \p{gc=Mark}
jtulach@1348
   182
        //  \p{digit}
jtulach@1348
   183
        //  \p{gc=Connector_Punctuation}
jtulach@1348
   184
jtulach@1348
   185
        public boolean is(int ch) {
jtulach@1348
   186
            return ALPHABETIC.is(ch) ||
jtulach@1348
   187
                   ((((1 << Character.NON_SPACING_MARK) |
jtulach@1348
   188
                      (1 << Character.ENCLOSING_MARK) |
jtulach@1348
   189
                      (1 << Character.COMBINING_SPACING_MARK) |
jtulach@1348
   190
                      (1 << Character.DECIMAL_DIGIT_NUMBER) |
jtulach@1348
   191
                      (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
jtulach@1348
   192
                   != 0;
jtulach@1348
   193
        }
jtulach@1348
   194
    };
jtulach@1348
   195
jtulach@1348
   196
    private final static HashMap<String, String> posix = new HashMap<>();
jtulach@1348
   197
    private final static HashMap<String, String> aliases = new HashMap<>();
jtulach@1348
   198
    static {
jtulach@1348
   199
        posix.put("ALPHA", "ALPHABETIC");
jtulach@1348
   200
        posix.put("LOWER", "LOWERCASE");
jtulach@1348
   201
        posix.put("UPPER", "UPPERCASE");
jtulach@1348
   202
        posix.put("SPACE", "WHITE_SPACE");
jtulach@1348
   203
        posix.put("PUNCT", "PUNCTUATION");
jtulach@1348
   204
        posix.put("XDIGIT","HEX_DIGIT");
jtulach@1348
   205
        posix.put("ALNUM", "ALNUM");
jtulach@1348
   206
        posix.put("CNTRL", "CONTROL");
jtulach@1348
   207
        posix.put("DIGIT", "DIGIT");
jtulach@1348
   208
        posix.put("BLANK", "BLANK");
jtulach@1348
   209
        posix.put("GRAPH", "GRAPH");
jtulach@1348
   210
        posix.put("PRINT", "PRINT");
jtulach@1348
   211
jtulach@1348
   212
        aliases.put("WHITESPACE", "WHITE_SPACE");
jtulach@1348
   213
        aliases.put("HEXDIGIT","HEX_DIGIT");
jtulach@1348
   214
        aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
jtulach@1348
   215
    }
jtulach@1348
   216
jtulach@1348
   217
    public static UnicodeProp forName(String propName) {
jtulach@1348
   218
        propName = propName.toUpperCase(Locale.ENGLISH);
jtulach@1348
   219
        String alias = aliases.get(propName);
jtulach@1348
   220
        if (alias != null)
jtulach@1348
   221
            propName = alias;
jtulach@1348
   222
        try {
jtulach@1348
   223
            return valueOf (propName);
jtulach@1348
   224
        } catch (IllegalArgumentException x) {}
jtulach@1348
   225
        return null;
jtulach@1348
   226
    }
jtulach@1348
   227
jtulach@1348
   228
    public static UnicodeProp forPOSIXName(String propName) {
jtulach@1348
   229
        propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
jtulach@1348
   230
        if (propName == null)
jtulach@1348
   231
            return null;
jtulach@1348
   232
        return valueOf (propName);
jtulach@1348
   233
    }
jtulach@1348
   234
jtulach@1348
   235
    public abstract boolean is(int ch);
jtulach@1348
   236
}