jtulach@1348
|
1 |
/*
|
jtulach@1348
|
2 |
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
|
jtulach@1348
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
jtulach@1348
|
4 |
*
|
jtulach@1348
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
jtulach@1348
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
jtulach@1348
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
jtulach@1348
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
jtulach@1348
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
jtulach@1348
|
10 |
*
|
jtulach@1348
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
jtulach@1348
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
jtulach@1348
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
jtulach@1348
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
jtulach@1348
|
15 |
* accompanied this code).
|
jtulach@1348
|
16 |
*
|
jtulach@1348
|
17 |
* You should have received a copy of the GNU General Public License version
|
jtulach@1348
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
jtulach@1348
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
jtulach@1348
|
20 |
*
|
jtulach@1348
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
jtulach@1348
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
jtulach@1348
|
23 |
* questions.
|
jtulach@1348
|
24 |
*/
|
jtulach@1348
|
25 |
|
jtulach@1348
|
26 |
package java.util.regex;
|
jtulach@1348
|
27 |
|
jtulach@1348
|
28 |
import java.util.HashMap;
|
jtulach@1348
|
29 |
import java.util.Locale;
|
jtulach@1348
|
30 |
|
jtulach@1348
|
31 |
enum UnicodeProp {
|
jtulach@1348
|
32 |
|
jtulach@1348
|
33 |
ALPHABETIC {
|
jtulach@1348
|
34 |
public boolean is(int ch) {
|
jtulach@1348
|
35 |
return Character.isAlphabetic(ch);
|
jtulach@1348
|
36 |
}
|
jtulach@1348
|
37 |
},
|
jtulach@1348
|
38 |
|
jtulach@1348
|
39 |
LETTER {
|
jtulach@1348
|
40 |
public boolean is(int ch) {
|
jtulach@1348
|
41 |
return Character.isLetter(ch);
|
jtulach@1348
|
42 |
}
|
jtulach@1348
|
43 |
},
|
jtulach@1348
|
44 |
|
jtulach@1348
|
45 |
IDEOGRAPHIC {
|
jtulach@1348
|
46 |
public boolean is(int ch) {
|
jtulach@1348
|
47 |
return Character.isIdeographic(ch);
|
jtulach@1348
|
48 |
}
|
jtulach@1348
|
49 |
},
|
jtulach@1348
|
50 |
|
jtulach@1348
|
51 |
LOWERCASE {
|
jtulach@1348
|
52 |
public boolean is(int ch) {
|
jtulach@1348
|
53 |
return Character.isLowerCase(ch);
|
jtulach@1348
|
54 |
}
|
jtulach@1348
|
55 |
},
|
jtulach@1348
|
56 |
|
jtulach@1348
|
57 |
UPPERCASE {
|
jtulach@1348
|
58 |
public boolean is(int ch) {
|
jtulach@1348
|
59 |
return Character.isUpperCase(ch);
|
jtulach@1348
|
60 |
}
|
jtulach@1348
|
61 |
},
|
jtulach@1348
|
62 |
|
jtulach@1348
|
63 |
TITLECASE {
|
jtulach@1348
|
64 |
public boolean is(int ch) {
|
jtulach@1348
|
65 |
return Character.isTitleCase(ch);
|
jtulach@1348
|
66 |
}
|
jtulach@1348
|
67 |
},
|
jtulach@1348
|
68 |
|
jtulach@1348
|
69 |
WHITE_SPACE {
|
jtulach@1348
|
70 |
// \p{Whitespace}
|
jtulach@1348
|
71 |
public boolean is(int ch) {
|
jtulach@1348
|
72 |
return ((((1 << Character.SPACE_SEPARATOR) |
|
jtulach@1348
|
73 |
(1 << Character.LINE_SEPARATOR) |
|
jtulach@1348
|
74 |
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
|
jtulach@1348
|
75 |
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
|
jtulach@1348
|
76 |
}
|
jtulach@1348
|
77 |
},
|
jtulach@1348
|
78 |
|
jtulach@1348
|
79 |
CONTROL {
|
jtulach@1348
|
80 |
// \p{gc=Control}
|
jtulach@1348
|
81 |
public boolean is(int ch) {
|
jtulach@1348
|
82 |
return Character.getType(ch) == Character.CONTROL;
|
jtulach@1348
|
83 |
}
|
jtulach@1348
|
84 |
},
|
jtulach@1348
|
85 |
|
jtulach@1348
|
86 |
PUNCTUATION {
|
jtulach@1348
|
87 |
// \p{gc=Punctuation}
|
jtulach@1348
|
88 |
public boolean is(int ch) {
|
jtulach@1348
|
89 |
return ((((1 << Character.CONNECTOR_PUNCTUATION) |
|
jtulach@1348
|
90 |
(1 << Character.DASH_PUNCTUATION) |
|
jtulach@1348
|
91 |
(1 << Character.START_PUNCTUATION) |
|
jtulach@1348
|
92 |
(1 << Character.END_PUNCTUATION) |
|
jtulach@1348
|
93 |
(1 << Character.OTHER_PUNCTUATION) |
|
jtulach@1348
|
94 |
(1 << Character.INITIAL_QUOTE_PUNCTUATION) |
|
jtulach@1348
|
95 |
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
jtulach@1348
|
96 |
!= 0;
|
jtulach@1348
|
97 |
}
|
jtulach@1348
|
98 |
},
|
jtulach@1348
|
99 |
|
jtulach@1348
|
100 |
HEX_DIGIT {
|
jtulach@1348
|
101 |
// \p{gc=Decimal_Number}
|
jtulach@1348
|
102 |
// \p{Hex_Digit} -> PropList.txt: Hex_Digit
|
jtulach@1348
|
103 |
public boolean is(int ch) {
|
jtulach@1348
|
104 |
return DIGIT.is(ch) ||
|
jtulach@1348
|
105 |
(ch >= 0x0030 && ch <= 0x0039) ||
|
jtulach@1348
|
106 |
(ch >= 0x0041 && ch <= 0x0046) ||
|
jtulach@1348
|
107 |
(ch >= 0x0061 && ch <= 0x0066) ||
|
jtulach@1348
|
108 |
(ch >= 0xFF10 && ch <= 0xFF19) ||
|
jtulach@1348
|
109 |
(ch >= 0xFF21 && ch <= 0xFF26) ||
|
jtulach@1348
|
110 |
(ch >= 0xFF41 && ch <= 0xFF46);
|
jtulach@1348
|
111 |
}
|
jtulach@1348
|
112 |
},
|
jtulach@1348
|
113 |
|
jtulach@1348
|
114 |
ASSIGNED {
|
jtulach@1348
|
115 |
public boolean is(int ch) {
|
jtulach@1348
|
116 |
return Character.getType(ch) != Character.UNASSIGNED;
|
jtulach@1348
|
117 |
}
|
jtulach@1348
|
118 |
},
|
jtulach@1348
|
119 |
|
jtulach@1348
|
120 |
NONCHARACTER_CODE_POINT {
|
jtulach@1348
|
121 |
// PropList.txt:Noncharacter_Code_Point
|
jtulach@1348
|
122 |
public boolean is(int ch) {
|
jtulach@1348
|
123 |
return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
|
jtulach@1348
|
124 |
}
|
jtulach@1348
|
125 |
},
|
jtulach@1348
|
126 |
|
jtulach@1348
|
127 |
DIGIT {
|
jtulach@1348
|
128 |
// \p{gc=Decimal_Number}
|
jtulach@1348
|
129 |
public boolean is(int ch) {
|
jtulach@1348
|
130 |
return Character.isDigit(ch);
|
jtulach@1348
|
131 |
}
|
jtulach@1348
|
132 |
},
|
jtulach@1348
|
133 |
|
jtulach@1348
|
134 |
ALNUM {
|
jtulach@1348
|
135 |
// \p{alpha}
|
jtulach@1348
|
136 |
// \p{digit}
|
jtulach@1348
|
137 |
public boolean is(int ch) {
|
jtulach@1348
|
138 |
return ALPHABETIC.is(ch) || DIGIT.is(ch);
|
jtulach@1348
|
139 |
}
|
jtulach@1348
|
140 |
},
|
jtulach@1348
|
141 |
|
jtulach@1348
|
142 |
BLANK {
|
jtulach@1348
|
143 |
// \p{Whitespace} --
|
jtulach@1348
|
144 |
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85
|
jtulach@1348
|
145 |
// \p{gc=Line_Separator}
|
jtulach@1348
|
146 |
// \p{gc=Paragraph_Separator}]
|
jtulach@1348
|
147 |
public boolean is(int ch) {
|
jtulach@1348
|
148 |
return Character.getType(ch) == Character.SPACE_SEPARATOR ||
|
jtulach@1348
|
149 |
ch == 0x9; // \N{HT}
|
jtulach@1348
|
150 |
}
|
jtulach@1348
|
151 |
},
|
jtulach@1348
|
152 |
|
jtulach@1348
|
153 |
GRAPH {
|
jtulach@1348
|
154 |
// [^
|
jtulach@1348
|
155 |
// \p{space}
|
jtulach@1348
|
156 |
// \p{gc=Control}
|
jtulach@1348
|
157 |
// \p{gc=Surrogate}
|
jtulach@1348
|
158 |
// \p{gc=Unassigned}]
|
jtulach@1348
|
159 |
public boolean is(int ch) {
|
jtulach@1348
|
160 |
return ((((1 << Character.SPACE_SEPARATOR) |
|
jtulach@1348
|
161 |
(1 << Character.LINE_SEPARATOR) |
|
jtulach@1348
|
162 |
(1 << Character.PARAGRAPH_SEPARATOR) |
|
jtulach@1348
|
163 |
(1 << Character.CONTROL) |
|
jtulach@1348
|
164 |
(1 << Character.SURROGATE) |
|
jtulach@1348
|
165 |
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
|
jtulach@1348
|
166 |
== 0;
|
jtulach@1348
|
167 |
}
|
jtulach@1348
|
168 |
},
|
jtulach@1348
|
169 |
|
jtulach@1348
|
170 |
PRINT {
|
jtulach@1348
|
171 |
// \p{graph}
|
jtulach@1348
|
172 |
// \p{blank}
|
jtulach@1348
|
173 |
// -- \p{cntrl}
|
jtulach@1348
|
174 |
public boolean is(int ch) {
|
jtulach@1348
|
175 |
return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
|
jtulach@1348
|
176 |
}
|
jtulach@1348
|
177 |
},
|
jtulach@1348
|
178 |
|
jtulach@1348
|
179 |
WORD {
|
jtulach@1348
|
180 |
// \p{alpha}
|
jtulach@1348
|
181 |
// \p{gc=Mark}
|
jtulach@1348
|
182 |
// \p{digit}
|
jtulach@1348
|
183 |
// \p{gc=Connector_Punctuation}
|
jtulach@1348
|
184 |
|
jtulach@1348
|
185 |
public boolean is(int ch) {
|
jtulach@1348
|
186 |
return ALPHABETIC.is(ch) ||
|
jtulach@1348
|
187 |
((((1 << Character.NON_SPACING_MARK) |
|
jtulach@1348
|
188 |
(1 << Character.ENCLOSING_MARK) |
|
jtulach@1348
|
189 |
(1 << Character.COMBINING_SPACING_MARK) |
|
jtulach@1348
|
190 |
(1 << Character.DECIMAL_DIGIT_NUMBER) |
|
jtulach@1348
|
191 |
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
jtulach@1348
|
192 |
!= 0;
|
jtulach@1348
|
193 |
}
|
jtulach@1348
|
194 |
};
|
jtulach@1348
|
195 |
|
jtulach@1348
|
196 |
private final static HashMap<String, String> posix = new HashMap<>();
|
jtulach@1348
|
197 |
private final static HashMap<String, String> aliases = new HashMap<>();
|
jtulach@1348
|
198 |
static {
|
jtulach@1348
|
199 |
posix.put("ALPHA", "ALPHABETIC");
|
jtulach@1348
|
200 |
posix.put("LOWER", "LOWERCASE");
|
jtulach@1348
|
201 |
posix.put("UPPER", "UPPERCASE");
|
jtulach@1348
|
202 |
posix.put("SPACE", "WHITE_SPACE");
|
jtulach@1348
|
203 |
posix.put("PUNCT", "PUNCTUATION");
|
jtulach@1348
|
204 |
posix.put("XDIGIT","HEX_DIGIT");
|
jtulach@1348
|
205 |
posix.put("ALNUM", "ALNUM");
|
jtulach@1348
|
206 |
posix.put("CNTRL", "CONTROL");
|
jtulach@1348
|
207 |
posix.put("DIGIT", "DIGIT");
|
jtulach@1348
|
208 |
posix.put("BLANK", "BLANK");
|
jtulach@1348
|
209 |
posix.put("GRAPH", "GRAPH");
|
jtulach@1348
|
210 |
posix.put("PRINT", "PRINT");
|
jtulach@1348
|
211 |
|
jtulach@1348
|
212 |
aliases.put("WHITESPACE", "WHITE_SPACE");
|
jtulach@1348
|
213 |
aliases.put("HEXDIGIT","HEX_DIGIT");
|
jtulach@1348
|
214 |
aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
|
jtulach@1348
|
215 |
}
|
jtulach@1348
|
216 |
|
jtulach@1348
|
217 |
public static UnicodeProp forName(String propName) {
|
jtulach@1348
|
218 |
propName = propName.toUpperCase(Locale.ENGLISH);
|
jtulach@1348
|
219 |
String alias = aliases.get(propName);
|
jtulach@1348
|
220 |
if (alias != null)
|
jtulach@1348
|
221 |
propName = alias;
|
jtulach@1348
|
222 |
try {
|
jtulach@1348
|
223 |
return valueOf (propName);
|
jtulach@1348
|
224 |
} catch (IllegalArgumentException x) {}
|
jtulach@1348
|
225 |
return null;
|
jtulach@1348
|
226 |
}
|
jtulach@1348
|
227 |
|
jtulach@1348
|
228 |
public static UnicodeProp forPOSIXName(String propName) {
|
jtulach@1348
|
229 |
propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
|
jtulach@1348
|
230 |
if (propName == null)
|
jtulach@1348
|
231 |
return null;
|
jtulach@1348
|
232 |
return valueOf (propName);
|
jtulach@1348
|
233 |
}
|
jtulach@1348
|
234 |
|
jtulach@1348
|
235 |
public abstract boolean is(int ch);
|
jtulach@1348
|
236 |
}
|