rt/emul/compact/src/main/java/java/util/regex/UnicodeProp.java
branchjdk7-b147
changeset 1348 bca65655b36b
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/rt/emul/compact/src/main/java/java/util/regex/UnicodeProp.java	Mon Oct 07 16:13:27 2013 +0200
     1.3 @@ -0,0 +1,236 @@
     1.4 +/*
     1.5 + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package java.util.regex;
    1.30 +
    1.31 +import java.util.HashMap;
    1.32 +import java.util.Locale;
    1.33 +
    1.34 +enum UnicodeProp {
    1.35 +
    1.36 +    ALPHABETIC {
    1.37 +        public boolean is(int ch) {
    1.38 +            return Character.isAlphabetic(ch);
    1.39 +        }
    1.40 +    },
    1.41 +
    1.42 +    LETTER {
    1.43 +        public boolean is(int ch) {
    1.44 +            return Character.isLetter(ch);
    1.45 +        }
    1.46 +    },
    1.47 +
    1.48 +    IDEOGRAPHIC {
    1.49 +        public boolean is(int ch) {
    1.50 +            return Character.isIdeographic(ch);
    1.51 +        }
    1.52 +    },
    1.53 +
    1.54 +    LOWERCASE {
    1.55 +        public boolean is(int ch) {
    1.56 +            return Character.isLowerCase(ch);
    1.57 +        }
    1.58 +    },
    1.59 +
    1.60 +    UPPERCASE {
    1.61 +        public boolean is(int ch) {
    1.62 +            return Character.isUpperCase(ch);
    1.63 +        }
    1.64 +    },
    1.65 +
    1.66 +    TITLECASE {
    1.67 +        public boolean is(int ch) {
    1.68 +            return Character.isTitleCase(ch);
    1.69 +        }
    1.70 +    },
    1.71 +
    1.72 +    WHITE_SPACE {
    1.73 +        // \p{Whitespace}
    1.74 +        public boolean is(int ch) {
    1.75 +            return ((((1 << Character.SPACE_SEPARATOR) |
    1.76 +                      (1 << Character.LINE_SEPARATOR) |
    1.77 +                      (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
    1.78 +                   != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
    1.79 +        }
    1.80 +    },
    1.81 +
    1.82 +    CONTROL {
    1.83 +        // \p{gc=Control}
    1.84 +        public boolean is(int ch) {
    1.85 +            return Character.getType(ch) == Character.CONTROL;
    1.86 +        }
    1.87 +    },
    1.88 +
    1.89 +    PUNCTUATION {
    1.90 +        // \p{gc=Punctuation}
    1.91 +        public boolean is(int ch) {
    1.92 +            return ((((1 << Character.CONNECTOR_PUNCTUATION) |
    1.93 +                      (1 << Character.DASH_PUNCTUATION) |
    1.94 +                      (1 << Character.START_PUNCTUATION) |
    1.95 +                      (1 << Character.END_PUNCTUATION) |
    1.96 +                      (1 << Character.OTHER_PUNCTUATION) |
    1.97 +                      (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
    1.98 +                      (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
    1.99 +                   != 0;
   1.100 +        }
   1.101 +    },
   1.102 +
   1.103 +    HEX_DIGIT {
   1.104 +        // \p{gc=Decimal_Number}
   1.105 +        // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
   1.106 +        public boolean is(int ch) {
   1.107 +            return DIGIT.is(ch) ||
   1.108 +                   (ch >= 0x0030 && ch <= 0x0039) ||
   1.109 +                   (ch >= 0x0041 && ch <= 0x0046) ||
   1.110 +                   (ch >= 0x0061 && ch <= 0x0066) ||
   1.111 +                   (ch >= 0xFF10 && ch <= 0xFF19) ||
   1.112 +                   (ch >= 0xFF21 && ch <= 0xFF26) ||
   1.113 +                   (ch >= 0xFF41 && ch <= 0xFF46);
   1.114 +        }
   1.115 +    },
   1.116 +
   1.117 +    ASSIGNED {
   1.118 +        public boolean is(int ch) {
   1.119 +            return Character.getType(ch) != Character.UNASSIGNED;
   1.120 +        }
   1.121 +    },
   1.122 +
   1.123 +    NONCHARACTER_CODE_POINT {
   1.124 +        // PropList.txt:Noncharacter_Code_Point
   1.125 +        public boolean is(int ch) {
   1.126 +            return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
   1.127 +        }
   1.128 +    },
   1.129 +
   1.130 +    DIGIT {
   1.131 +        // \p{gc=Decimal_Number}
   1.132 +        public boolean is(int ch) {
   1.133 +            return Character.isDigit(ch);
   1.134 +        }
   1.135 +    },
   1.136 +
   1.137 +    ALNUM {
   1.138 +        // \p{alpha}
   1.139 +        // \p{digit}
   1.140 +        public boolean is(int ch) {
   1.141 +            return ALPHABETIC.is(ch) || DIGIT.is(ch);
   1.142 +        }
   1.143 +    },
   1.144 +
   1.145 +    BLANK {
   1.146 +        // \p{Whitespace} --
   1.147 +        // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
   1.148 +        //  \p{gc=Line_Separator}
   1.149 +        //  \p{gc=Paragraph_Separator}]
   1.150 +        public boolean is(int ch) {
   1.151 +            return Character.getType(ch) == Character.SPACE_SEPARATOR ||
   1.152 +                   ch == 0x9; // \N{HT}
   1.153 +        }
   1.154 +    },
   1.155 +
   1.156 +    GRAPH {
   1.157 +        // [^
   1.158 +        //  \p{space}
   1.159 +        //  \p{gc=Control}
   1.160 +        //  \p{gc=Surrogate}
   1.161 +        //  \p{gc=Unassigned}]
   1.162 +        public boolean is(int ch) {
   1.163 +            return ((((1 << Character.SPACE_SEPARATOR) |
   1.164 +                      (1 << Character.LINE_SEPARATOR) |
   1.165 +                      (1 << Character.PARAGRAPH_SEPARATOR) |
   1.166 +                      (1 << Character.CONTROL) |
   1.167 +                      (1 << Character.SURROGATE) |
   1.168 +                      (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
   1.169 +                   == 0;
   1.170 +        }
   1.171 +    },
   1.172 +
   1.173 +    PRINT {
   1.174 +        // \p{graph}
   1.175 +        // \p{blank}
   1.176 +        // -- \p{cntrl}
   1.177 +        public boolean is(int ch) {
   1.178 +            return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
   1.179 +        }
   1.180 +    },
   1.181 +
   1.182 +    WORD {
   1.183 +        //  \p{alpha}
   1.184 +        //  \p{gc=Mark}
   1.185 +        //  \p{digit}
   1.186 +        //  \p{gc=Connector_Punctuation}
   1.187 +
   1.188 +        public boolean is(int ch) {
   1.189 +            return ALPHABETIC.is(ch) ||
   1.190 +                   ((((1 << Character.NON_SPACING_MARK) |
   1.191 +                      (1 << Character.ENCLOSING_MARK) |
   1.192 +                      (1 << Character.COMBINING_SPACING_MARK) |
   1.193 +                      (1 << Character.DECIMAL_DIGIT_NUMBER) |
   1.194 +                      (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
   1.195 +                   != 0;
   1.196 +        }
   1.197 +    };
   1.198 +
   1.199 +    private final static HashMap<String, String> posix = new HashMap<>();
   1.200 +    private final static HashMap<String, String> aliases = new HashMap<>();
   1.201 +    static {
   1.202 +        posix.put("ALPHA", "ALPHABETIC");
   1.203 +        posix.put("LOWER", "LOWERCASE");
   1.204 +        posix.put("UPPER", "UPPERCASE");
   1.205 +        posix.put("SPACE", "WHITE_SPACE");
   1.206 +        posix.put("PUNCT", "PUNCTUATION");
   1.207 +        posix.put("XDIGIT","HEX_DIGIT");
   1.208 +        posix.put("ALNUM", "ALNUM");
   1.209 +        posix.put("CNTRL", "CONTROL");
   1.210 +        posix.put("DIGIT", "DIGIT");
   1.211 +        posix.put("BLANK", "BLANK");
   1.212 +        posix.put("GRAPH", "GRAPH");
   1.213 +        posix.put("PRINT", "PRINT");
   1.214 +
   1.215 +        aliases.put("WHITESPACE", "WHITE_SPACE");
   1.216 +        aliases.put("HEXDIGIT","HEX_DIGIT");
   1.217 +        aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
   1.218 +    }
   1.219 +
   1.220 +    public static UnicodeProp forName(String propName) {
   1.221 +        propName = propName.toUpperCase(Locale.ENGLISH);
   1.222 +        String alias = aliases.get(propName);
   1.223 +        if (alias != null)
   1.224 +            propName = alias;
   1.225 +        try {
   1.226 +            return valueOf (propName);
   1.227 +        } catch (IllegalArgumentException x) {}
   1.228 +        return null;
   1.229 +    }
   1.230 +
   1.231 +    public static UnicodeProp forPOSIXName(String propName) {
   1.232 +        propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
   1.233 +        if (propName == null)
   1.234 +            return null;
   1.235 +        return valueOf (propName);
   1.236 +    }
   1.237 +
   1.238 +    public abstract boolean is(int ch);
   1.239 +}