1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/rt/emul/compact/src/main/java/java/util/regex/UnicodeProp.java Mon Oct 07 16:13:27 2013 +0200
1.3 @@ -0,0 +1,236 @@
1.4 +/*
1.5 + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1.7 + *
1.8 + * This code is free software; you can redistribute it and/or modify it
1.9 + * under the terms of the GNU General Public License version 2 only, as
1.10 + * published by the Free Software Foundation. Oracle designates this
1.11 + * particular file as subject to the "Classpath" exception as provided
1.12 + * by Oracle in the LICENSE file that accompanied this code.
1.13 + *
1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1.17 + * version 2 for more details (a copy is included in the LICENSE file that
1.18 + * accompanied this code).
1.19 + *
1.20 + * You should have received a copy of the GNU General Public License version
1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1.23 + *
1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1.25 + * or visit www.oracle.com if you need additional information or have any
1.26 + * questions.
1.27 + */
1.28 +
1.29 +package java.util.regex;
1.30 +
1.31 +import java.util.HashMap;
1.32 +import java.util.Locale;
1.33 +
1.34 +enum UnicodeProp {
1.35 +
1.36 + ALPHABETIC {
1.37 + public boolean is(int ch) {
1.38 + return Character.isAlphabetic(ch);
1.39 + }
1.40 + },
1.41 +
1.42 + LETTER {
1.43 + public boolean is(int ch) {
1.44 + return Character.isLetter(ch);
1.45 + }
1.46 + },
1.47 +
1.48 + IDEOGRAPHIC {
1.49 + public boolean is(int ch) {
1.50 + return Character.isIdeographic(ch);
1.51 + }
1.52 + },
1.53 +
1.54 + LOWERCASE {
1.55 + public boolean is(int ch) {
1.56 + return Character.isLowerCase(ch);
1.57 + }
1.58 + },
1.59 +
1.60 + UPPERCASE {
1.61 + public boolean is(int ch) {
1.62 + return Character.isUpperCase(ch);
1.63 + }
1.64 + },
1.65 +
1.66 + TITLECASE {
1.67 + public boolean is(int ch) {
1.68 + return Character.isTitleCase(ch);
1.69 + }
1.70 + },
1.71 +
1.72 + WHITE_SPACE {
1.73 + // \p{Whitespace}
1.74 + public boolean is(int ch) {
1.75 + return ((((1 << Character.SPACE_SEPARATOR) |
1.76 + (1 << Character.LINE_SEPARATOR) |
1.77 + (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
1.78 + != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
1.79 + }
1.80 + },
1.81 +
1.82 + CONTROL {
1.83 + // \p{gc=Control}
1.84 + public boolean is(int ch) {
1.85 + return Character.getType(ch) == Character.CONTROL;
1.86 + }
1.87 + },
1.88 +
1.89 + PUNCTUATION {
1.90 + // \p{gc=Punctuation}
1.91 + public boolean is(int ch) {
1.92 + return ((((1 << Character.CONNECTOR_PUNCTUATION) |
1.93 + (1 << Character.DASH_PUNCTUATION) |
1.94 + (1 << Character.START_PUNCTUATION) |
1.95 + (1 << Character.END_PUNCTUATION) |
1.96 + (1 << Character.OTHER_PUNCTUATION) |
1.97 + (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
1.98 + (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
1.99 + != 0;
1.100 + }
1.101 + },
1.102 +
1.103 + HEX_DIGIT {
1.104 + // \p{gc=Decimal_Number}
1.105 + // \p{Hex_Digit} -> PropList.txt: Hex_Digit
1.106 + public boolean is(int ch) {
1.107 + return DIGIT.is(ch) ||
1.108 + (ch >= 0x0030 && ch <= 0x0039) ||
1.109 + (ch >= 0x0041 && ch <= 0x0046) ||
1.110 + (ch >= 0x0061 && ch <= 0x0066) ||
1.111 + (ch >= 0xFF10 && ch <= 0xFF19) ||
1.112 + (ch >= 0xFF21 && ch <= 0xFF26) ||
1.113 + (ch >= 0xFF41 && ch <= 0xFF46);
1.114 + }
1.115 + },
1.116 +
1.117 + ASSIGNED {
1.118 + public boolean is(int ch) {
1.119 + return Character.getType(ch) != Character.UNASSIGNED;
1.120 + }
1.121 + },
1.122 +
1.123 + NONCHARACTER_CODE_POINT {
1.124 + // PropList.txt:Noncharacter_Code_Point
1.125 + public boolean is(int ch) {
1.126 + return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
1.127 + }
1.128 + },
1.129 +
1.130 + DIGIT {
1.131 + // \p{gc=Decimal_Number}
1.132 + public boolean is(int ch) {
1.133 + return Character.isDigit(ch);
1.134 + }
1.135 + },
1.136 +
1.137 + ALNUM {
1.138 + // \p{alpha}
1.139 + // \p{digit}
1.140 + public boolean is(int ch) {
1.141 + return ALPHABETIC.is(ch) || DIGIT.is(ch);
1.142 + }
1.143 + },
1.144 +
1.145 + BLANK {
1.146 + // \p{Whitespace} --
1.147 + // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85
1.148 + // \p{gc=Line_Separator}
1.149 + // \p{gc=Paragraph_Separator}]
1.150 + public boolean is(int ch) {
1.151 + return Character.getType(ch) == Character.SPACE_SEPARATOR ||
1.152 + ch == 0x9; // \N{HT}
1.153 + }
1.154 + },
1.155 +
1.156 + GRAPH {
1.157 + // [^
1.158 + // \p{space}
1.159 + // \p{gc=Control}
1.160 + // \p{gc=Surrogate}
1.161 + // \p{gc=Unassigned}]
1.162 + public boolean is(int ch) {
1.163 + return ((((1 << Character.SPACE_SEPARATOR) |
1.164 + (1 << Character.LINE_SEPARATOR) |
1.165 + (1 << Character.PARAGRAPH_SEPARATOR) |
1.166 + (1 << Character.CONTROL) |
1.167 + (1 << Character.SURROGATE) |
1.168 + (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
1.169 + == 0;
1.170 + }
1.171 + },
1.172 +
1.173 + PRINT {
1.174 + // \p{graph}
1.175 + // \p{blank}
1.176 + // -- \p{cntrl}
1.177 + public boolean is(int ch) {
1.178 + return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
1.179 + }
1.180 + },
1.181 +
1.182 + WORD {
1.183 + // \p{alpha}
1.184 + // \p{gc=Mark}
1.185 + // \p{digit}
1.186 + // \p{gc=Connector_Punctuation}
1.187 +
1.188 + public boolean is(int ch) {
1.189 + return ALPHABETIC.is(ch) ||
1.190 + ((((1 << Character.NON_SPACING_MARK) |
1.191 + (1 << Character.ENCLOSING_MARK) |
1.192 + (1 << Character.COMBINING_SPACING_MARK) |
1.193 + (1 << Character.DECIMAL_DIGIT_NUMBER) |
1.194 + (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
1.195 + != 0;
1.196 + }
1.197 + };
1.198 +
1.199 + private final static HashMap<String, String> posix = new HashMap<>();
1.200 + private final static HashMap<String, String> aliases = new HashMap<>();
1.201 + static {
1.202 + posix.put("ALPHA", "ALPHABETIC");
1.203 + posix.put("LOWER", "LOWERCASE");
1.204 + posix.put("UPPER", "UPPERCASE");
1.205 + posix.put("SPACE", "WHITE_SPACE");
1.206 + posix.put("PUNCT", "PUNCTUATION");
1.207 + posix.put("XDIGIT","HEX_DIGIT");
1.208 + posix.put("ALNUM", "ALNUM");
1.209 + posix.put("CNTRL", "CONTROL");
1.210 + posix.put("DIGIT", "DIGIT");
1.211 + posix.put("BLANK", "BLANK");
1.212 + posix.put("GRAPH", "GRAPH");
1.213 + posix.put("PRINT", "PRINT");
1.214 +
1.215 + aliases.put("WHITESPACE", "WHITE_SPACE");
1.216 + aliases.put("HEXDIGIT","HEX_DIGIT");
1.217 + aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
1.218 + }
1.219 +
1.220 + public static UnicodeProp forName(String propName) {
1.221 + propName = propName.toUpperCase(Locale.ENGLISH);
1.222 + String alias = aliases.get(propName);
1.223 + if (alias != null)
1.224 + propName = alias;
1.225 + try {
1.226 + return valueOf (propName);
1.227 + } catch (IllegalArgumentException x) {}
1.228 + return null;
1.229 + }
1.230 +
1.231 + public static UnicodeProp forPOSIXName(String propName) {
1.232 + propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
1.233 + if (propName == null)
1.234 + return null;
1.235 + return valueOf (propName);
1.236 + }
1.237 +
1.238 + public abstract boolean is(int ch);
1.239 +}