jaroslav@1646
|
1 |
/*
|
jaroslav@1646
|
2 |
* Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
|
jaroslav@1646
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
jaroslav@1646
|
4 |
*
|
jaroslav@1646
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
jaroslav@1646
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
jaroslav@1646
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
jaroslav@1646
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
jaroslav@1646
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
jaroslav@1646
|
10 |
*
|
jaroslav@1646
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
jaroslav@1646
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
jaroslav@1646
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
jaroslav@1646
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
jaroslav@1646
|
15 |
* accompanied this code).
|
jaroslav@1646
|
16 |
*
|
jaroslav@1646
|
17 |
* You should have received a copy of the GNU General Public License version
|
jaroslav@1646
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
jaroslav@1646
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
jaroslav@1646
|
20 |
*
|
jaroslav@1646
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
jaroslav@1646
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
jaroslav@1646
|
23 |
* questions.
|
jaroslav@1646
|
24 |
*/
|
jaroslav@1646
|
25 |
|
jaroslav@1646
|
26 |
package sun.invoke.util;
|
jaroslav@1646
|
27 |
|
jaroslav@1646
|
28 |
/**
|
jaroslav@1646
|
29 |
* Utility routines for dealing with bytecode-level names.
|
jaroslav@1646
|
30 |
* Includes universal mangling rules for the JVM.
|
jaroslav@1646
|
31 |
*
|
jaroslav@1646
|
32 |
* <h3>Avoiding Dangerous Characters </h3>
|
jaroslav@1646
|
33 |
*
|
jaroslav@1646
|
34 |
* <p>
|
jaroslav@1646
|
35 |
* The JVM defines a very small set of characters which are illegal
|
jaroslav@1646
|
36 |
* in name spellings. We will slightly extend and regularize this set
|
jaroslav@1646
|
37 |
* into a group of <cite>dangerous characters</cite>.
|
jaroslav@1646
|
38 |
* These characters will then be replaced, in mangled names, by escape sequences.
|
jaroslav@1646
|
39 |
* In addition, accidental escape sequences must be further escaped.
|
jaroslav@1646
|
40 |
* Finally, a special prefix will be applied if and only if
|
jaroslav@1646
|
41 |
* the mangling would otherwise fail to begin with the escape character.
|
jaroslav@1646
|
42 |
* This happens to cover the corner case of the null string,
|
jaroslav@1646
|
43 |
* and also clearly marks symbols which need demangling.
|
jaroslav@1646
|
44 |
* </p>
|
jaroslav@1646
|
45 |
* <p>
|
jaroslav@1646
|
46 |
* Dangerous characters are the union of all characters forbidden
|
jaroslav@1646
|
47 |
* or otherwise restricted by the JVM specification,
|
jaroslav@1646
|
48 |
* plus their mates, if they are brackets
|
jaroslav@1646
|
49 |
* (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
|
jaroslav@1646
|
50 |
* <code><big><b><</b></big></code> and <code><big><b>></b></big></code>),
|
jaroslav@1646
|
51 |
* plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
|
jaroslav@1646
|
52 |
* There is no distinction between type, method, and field names.
|
jaroslav@1646
|
53 |
* This makes it easier to convert between mangled names of different
|
jaroslav@1646
|
54 |
* types, since they do not need to be decoded (demangled).
|
jaroslav@1646
|
55 |
* </p>
|
jaroslav@1646
|
56 |
* <p>
|
jaroslav@1646
|
57 |
* The escape character is backslash <code><big><b>\</b></big></code>
|
jaroslav@1646
|
58 |
* (also known as reverse solidus).
|
jaroslav@1646
|
59 |
* This character is, until now, unheard of in bytecode names,
|
jaroslav@1646
|
60 |
* but traditional in the proposed role.
|
jaroslav@1646
|
61 |
*
|
jaroslav@1646
|
62 |
* </p>
|
jaroslav@1646
|
63 |
* <h3> Replacement Characters </h3>
|
jaroslav@1646
|
64 |
*
|
jaroslav@1646
|
65 |
*
|
jaroslav@1646
|
66 |
* <p>
|
jaroslav@1646
|
67 |
* Every escape sequence is two characters
|
jaroslav@1646
|
68 |
* (in fact, two UTF8 bytes) beginning with
|
jaroslav@1646
|
69 |
* the escape character and followed by a
|
jaroslav@1646
|
70 |
* <cite>replacement character</cite>.
|
jaroslav@1646
|
71 |
* (Since the replacement character is never a backslash,
|
jaroslav@1646
|
72 |
* iterated manglings do not double in size.)
|
jaroslav@1646
|
73 |
* </p>
|
jaroslav@1646
|
74 |
* <p>
|
jaroslav@1646
|
75 |
* Each dangerous character has some rough visual similarity
|
jaroslav@1646
|
76 |
* to its corresponding replacement character.
|
jaroslav@1646
|
77 |
* This makes mangled symbols easier to recognize by sight.
|
jaroslav@1646
|
78 |
* </p>
|
jaroslav@1646
|
79 |
* <p>
|
jaroslav@1646
|
80 |
* The dangerous characters are
|
jaroslav@1646
|
81 |
* <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
|
jaroslav@1646
|
82 |
* <code><big><b>.</b></big></code> (dot, also a package delimiter),
|
jaroslav@1646
|
83 |
* <code><big><b>;</b></big></code> (semicolon, used in signatures),
|
jaroslav@1646
|
84 |
* <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
|
jaroslav@1646
|
85 |
* <code><big><b><</b></big></code> (left angle),
|
jaroslav@1646
|
86 |
* <code><big><b>></b></big></code> (right angle),
|
jaroslav@1646
|
87 |
* <code><big><b>[</b></big></code> (left square bracket, used in array types),
|
jaroslav@1646
|
88 |
* <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
|
jaroslav@1646
|
89 |
* and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
|
jaroslav@1646
|
90 |
* Their replacements are, respectively,
|
jaroslav@1646
|
91 |
* <code><big><b>|</b></big></code> (vertical bar),
|
jaroslav@1646
|
92 |
* <code><big><b>,</b></big></code> (comma),
|
jaroslav@1646
|
93 |
* <code><big><b>?</b></big></code> (question mark),
|
jaroslav@1646
|
94 |
* <code><big><b>%</b></big></code> (percent),
|
jaroslav@1646
|
95 |
* <code><big><b>^</b></big></code> (caret),
|
jaroslav@1646
|
96 |
* <code><big><b>_</b></big></code> (underscore), and
|
jaroslav@1646
|
97 |
* <code><big><b>{</b></big></code> (left curly bracket),
|
jaroslav@1646
|
98 |
* <code><big><b>}</b></big></code> (right curly bracket),
|
jaroslav@1646
|
99 |
* <code><big><b>!</b></big></code> (exclamation mark).
|
jaroslav@1646
|
100 |
* In addition, the replacement character for the escape character itself is
|
jaroslav@1646
|
101 |
* <code><big><b>-</b></big></code> (hyphen),
|
jaroslav@1646
|
102 |
* and the replacement character for the null prefix is
|
jaroslav@1646
|
103 |
* <code><big><b>=</b></big></code> (equal sign).
|
jaroslav@1646
|
104 |
* </p>
|
jaroslav@1646
|
105 |
* <p>
|
jaroslav@1646
|
106 |
* An escape character <code><big><b>\</b></big></code>
|
jaroslav@1646
|
107 |
* followed by any of these replacement characters
|
jaroslav@1646
|
108 |
* is an escape sequence, and there are no other escape sequences.
|
jaroslav@1646
|
109 |
* An equal sign is only part of an escape sequence
|
jaroslav@1646
|
110 |
* if it is the second character in the whole string, following a backslash.
|
jaroslav@1646
|
111 |
* Two consecutive backslashes do <em>not</em> form an escape sequence.
|
jaroslav@1646
|
112 |
* </p>
|
jaroslav@1646
|
113 |
* <p>
|
jaroslav@1646
|
114 |
* Each escape sequence replaces a so-called <cite>original character</cite>
|
jaroslav@1646
|
115 |
* which is either one of the dangerous characters or the escape character.
|
jaroslav@1646
|
116 |
* A null prefix replaces an initial null string, not a character.
|
jaroslav@1646
|
117 |
* </p>
|
jaroslav@1646
|
118 |
* <p>
|
jaroslav@1646
|
119 |
* All this implies that escape sequences cannot overlap and may be
|
jaroslav@1646
|
120 |
* determined all at once for a whole string. Note that a spelling
|
jaroslav@1646
|
121 |
* string can contain <cite>accidental escapes</cite>, apparent escape
|
jaroslav@1646
|
122 |
* sequences which must not be interpreted as manglings.
|
jaroslav@1646
|
123 |
* These are disabled by replacing their leading backslash with an
|
jaroslav@1646
|
124 |
* escape sequence (<code><big><b>\-</b></big></code>). To mangle a string, three logical steps
|
jaroslav@1646
|
125 |
* are required, though they may be carried out in one pass:
|
jaroslav@1646
|
126 |
* </p>
|
jaroslav@1646
|
127 |
* <ol>
|
jaroslav@1646
|
128 |
* <li>In each accidental escape, replace the backslash with an escape sequence
|
jaroslav@1646
|
129 |
* (<code><big><b>\-</b></big></code>).</li>
|
jaroslav@1646
|
130 |
* <li>Replace each dangerous character with an escape sequence
|
jaroslav@1646
|
131 |
* (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
|
jaroslav@1646
|
132 |
* <li>If the first two steps introduced any change, <em>and</em>
|
jaroslav@1646
|
133 |
* if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
|
jaroslav@1646
|
134 |
* </ol>
|
jaroslav@1646
|
135 |
*
|
jaroslav@1646
|
136 |
* To demangle a mangled string that begins with an escape,
|
jaroslav@1646
|
137 |
* remove any null prefix, and then replace (in parallel)
|
jaroslav@1646
|
138 |
* each escape sequence by its original character.
|
jaroslav@1646
|
139 |
* <p>Spelling strings which contain accidental
|
jaroslav@1646
|
140 |
* escapes <em>must</em> have them replaced, even if those
|
jaroslav@1646
|
141 |
* strings do not contain dangerous characters.
|
jaroslav@1646
|
142 |
* This restriction means that mangling a string always
|
jaroslav@1646
|
143 |
* requires a scan of the string for escapes.
|
jaroslav@1646
|
144 |
* But then, a scan would be required anyway,
|
jaroslav@1646
|
145 |
* to check for dangerous characters.
|
jaroslav@1646
|
146 |
*
|
jaroslav@1646
|
147 |
* </p>
|
jaroslav@1646
|
148 |
* <h3> Nice Properties </h3>
|
jaroslav@1646
|
149 |
*
|
jaroslav@1646
|
150 |
* <p>
|
jaroslav@1646
|
151 |
* If a bytecode name does not contain any escape sequence,
|
jaroslav@1646
|
152 |
* demangling is a no-op: The string demangles to itself.
|
jaroslav@1646
|
153 |
* Such a string is called <cite>self-mangling</cite>.
|
jaroslav@1646
|
154 |
* Almost all strings are self-mangling.
|
jaroslav@1646
|
155 |
* In practice, to demangle almost any name “found in nature”,
|
jaroslav@1646
|
156 |
* simply verify that it does not begin with a backslash.
|
jaroslav@1646
|
157 |
* </p>
|
jaroslav@1646
|
158 |
* <p>
|
jaroslav@1646
|
159 |
* Mangling is a one-to-one function, while demangling
|
jaroslav@1646
|
160 |
* is a many-to-one function.
|
jaroslav@1646
|
161 |
* A mangled string is defined as <cite>validly mangled</cite> if
|
jaroslav@1646
|
162 |
* it is in fact the unique mangling of its spelling string.
|
jaroslav@1646
|
163 |
* Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
|
jaroslav@1646
|
164 |
* <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
|
jaroslav@1646
|
165 |
* <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
|
jaroslav@1646
|
166 |
* If a language back-end or runtime is using mangled names,
|
jaroslav@1646
|
167 |
* it should never present an invalidly mangled bytecode
|
jaroslav@1646
|
168 |
* name to the JVM. If the runtime encounters one,
|
jaroslav@1646
|
169 |
* it should also report an error, since such an occurrence
|
jaroslav@1646
|
170 |
* probably indicates a bug in name encoding which
|
jaroslav@1646
|
171 |
* will lead to errors in linkage.
|
jaroslav@1646
|
172 |
* However, this note does not propose that the JVM verifier
|
jaroslav@1646
|
173 |
* detect invalidly mangled names.
|
jaroslav@1646
|
174 |
* </p>
|
jaroslav@1646
|
175 |
* <p>
|
jaroslav@1646
|
176 |
* As a result of these rules, it is a simple matter to
|
jaroslav@1646
|
177 |
* compute validly mangled substrings and concatenations
|
jaroslav@1646
|
178 |
* of validly mangled strings, and (with a little care)
|
jaroslav@1646
|
179 |
* these correspond to corresponding operations on their
|
jaroslav@1646
|
180 |
* spelling strings.
|
jaroslav@1646
|
181 |
* </p>
|
jaroslav@1646
|
182 |
* <ul>
|
jaroslav@1646
|
183 |
* <li>Any prefix of a validly mangled string is also validly mangled,
|
jaroslav@1646
|
184 |
* although a null prefix may need to be removed.</li>
|
jaroslav@1646
|
185 |
* <li>Any suffix of a validly mangled string is also validly mangled,
|
jaroslav@1646
|
186 |
* although a null prefix may need to be added.</li>
|
jaroslav@1646
|
187 |
* <li>Two validly mangled strings, when concatenated,
|
jaroslav@1646
|
188 |
* are also validly mangled, although any null prefix
|
jaroslav@1646
|
189 |
* must be removed from the second string,
|
jaroslav@1646
|
190 |
* and a trailing backslash on the first string may need escaping,
|
jaroslav@1646
|
191 |
* if it would participate in an accidental escape when followed
|
jaroslav@1646
|
192 |
* by the first character of the second string.</li>
|
jaroslav@1646
|
193 |
* </ul>
|
jaroslav@1646
|
194 |
* <p>If languages that include non-Java symbol spellings use this
|
jaroslav@1646
|
195 |
* mangling convention, they will enjoy the following advantages:
|
jaroslav@1646
|
196 |
* </p>
|
jaroslav@1646
|
197 |
* <ul>
|
jaroslav@1646
|
198 |
* <li>They can interoperate via symbols they share in common.</li>
|
jaroslav@1646
|
199 |
* <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
|
jaroslav@1646
|
200 |
* <li>Future JVM and language extensions can safely use the dangerous characters
|
jaroslav@1646
|
201 |
* for structuring symbols, but will never interfere with valid spellings.</li>
|
jaroslav@1646
|
202 |
* <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
|
jaroslav@1646
|
203 |
* <li>Occasional transliterations and name composition will be simple and regular,
|
jaroslav@1646
|
204 |
* for classes, methods, and fields.</li>
|
jaroslav@1646
|
205 |
* <li>Bytecode names will continue to be compact.
|
jaroslav@1646
|
206 |
* When mangled, spellings will at most double in length, either in
|
jaroslav@1646
|
207 |
* UTF8 or UTF16 format, and most will not change at all.</li>
|
jaroslav@1646
|
208 |
* </ul>
|
jaroslav@1646
|
209 |
*
|
jaroslav@1646
|
210 |
*
|
jaroslav@1646
|
211 |
* <h3> Suggestions for Human Readable Presentations </h3>
|
jaroslav@1646
|
212 |
*
|
jaroslav@1646
|
213 |
*
|
jaroslav@1646
|
214 |
* <p>
|
jaroslav@1646
|
215 |
* For human readable displays of symbols,
|
jaroslav@1646
|
216 |
* it will be better to present a string-like quoted
|
jaroslav@1646
|
217 |
* representation of the spelling, because JVM users
|
jaroslav@1646
|
218 |
* are generally familiar with such tokens.
|
jaroslav@1646
|
219 |
* We suggest using single or double quotes before and after
|
jaroslav@1646
|
220 |
* mangled symbols which are not valid Java identifiers,
|
jaroslav@1646
|
221 |
* with quotes, backslashes, and non-printing characters
|
jaroslav@1646
|
222 |
* escaped as if for literals in the Java language.
|
jaroslav@1646
|
223 |
* </p>
|
jaroslav@1646
|
224 |
* <p>
|
jaroslav@1646
|
225 |
* For example, an HTML-like spelling
|
jaroslav@1646
|
226 |
* <code><big><b><pre></b></big></code> mangles to
|
jaroslav@1646
|
227 |
* <code><big><b>\^pre\_</b></big></code> and could
|
jaroslav@1646
|
228 |
* display more cleanly as
|
jaroslav@1646
|
229 |
* <code><big><b>'<pre>'</b></big></code>,
|
jaroslav@1646
|
230 |
* with the quotes included.
|
jaroslav@1646
|
231 |
* Such string-like conventions are <em>not</em> suitable
|
jaroslav@1646
|
232 |
* for mangled bytecode names, in part because
|
jaroslav@1646
|
233 |
* dangerous characters must be eliminated, rather
|
jaroslav@1646
|
234 |
* than just quoted. Otherwise internally structured
|
jaroslav@1646
|
235 |
* strings like package prefixes and method signatures
|
jaroslav@1646
|
236 |
* could not be reliably parsed.
|
jaroslav@1646
|
237 |
* </p>
|
jaroslav@1646
|
238 |
* <p>
|
jaroslav@1646
|
239 |
* In such human-readable displays, invalidly mangled
|
jaroslav@1646
|
240 |
* names should <em>not</em> be demangled and quoted,
|
jaroslav@1646
|
241 |
* for this would be misleading. Likewise, JVM symbols
|
jaroslav@1646
|
242 |
* which contain dangerous characters (like dots in field
|
jaroslav@1646
|
243 |
* names or brackets in method names) should not be
|
jaroslav@1646
|
244 |
* simply quoted. The bytecode names
|
jaroslav@1646
|
245 |
* <code><big><b>\=phase\,1</b></big></code> and
|
jaroslav@1646
|
246 |
* <code><big><b>phase.1</b></big></code> are distinct,
|
jaroslav@1646
|
247 |
* and in demangled displays they should be presented as
|
jaroslav@1646
|
248 |
* <code><big><b>'phase.1'</b></big></code> and something like
|
jaroslav@1646
|
249 |
* <code><big><b>'phase'.1</b></big></code>, respectively.
|
jaroslav@1646
|
250 |
* </p>
|
jaroslav@1646
|
251 |
*
|
jaroslav@1646
|
252 |
* @author John Rose
|
jaroslav@1646
|
253 |
* @version 1.2, 02/06/2008
|
jaroslav@1646
|
254 |
* @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
|
jaroslav@1646
|
255 |
*/
|
jaroslav@1646
|
256 |
public class BytecodeName {
|
jaroslav@1646
|
257 |
private BytecodeName() { } // static only class
|
jaroslav@1646
|
258 |
|
jaroslav@1646
|
259 |
/** Given a source name, produce the corresponding bytecode name.
|
jaroslav@1646
|
260 |
* The source name should not be qualified, because any syntactic
|
jaroslav@1646
|
261 |
* markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
|
jaroslav@1646
|
262 |
* @param s the source name
|
jaroslav@1646
|
263 |
* @return a valid bytecode name which represents the source name
|
jaroslav@1646
|
264 |
*/
|
jaroslav@1646
|
265 |
public static String toBytecodeName(String s) {
|
jaroslav@1646
|
266 |
String bn = mangle(s);
|
jaroslav@1646
|
267 |
assert((Object)bn == s || looksMangled(bn)) : bn;
|
jaroslav@1646
|
268 |
assert(s.equals(toSourceName(bn))) : s;
|
jaroslav@1646
|
269 |
return bn;
|
jaroslav@1646
|
270 |
}
|
jaroslav@1646
|
271 |
|
jaroslav@1646
|
272 |
/** Given an unqualified bytecode name, produce the corresponding source name.
|
jaroslav@1646
|
273 |
* The bytecode name must not contain dangerous characters.
|
jaroslav@1646
|
274 |
* In particular, it must not be qualified or segmented by colon {@code ':'}.
|
jaroslav@1646
|
275 |
* @param s the bytecode name
|
jaroslav@1646
|
276 |
* @return the source name, which may possibly have unsafe characters
|
jaroslav@1646
|
277 |
* @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
|
jaroslav@1646
|
278 |
* @see #isSafeBytecodeName(java.lang.String)
|
jaroslav@1646
|
279 |
*/
|
jaroslav@1646
|
280 |
public static String toSourceName(String s) {
|
jaroslav@1646
|
281 |
checkSafeBytecodeName(s);
|
jaroslav@1646
|
282 |
String sn = s;
|
jaroslav@1646
|
283 |
if (looksMangled(s)) {
|
jaroslav@1646
|
284 |
sn = demangle(s);
|
jaroslav@1646
|
285 |
assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
|
jaroslav@1646
|
286 |
}
|
jaroslav@1646
|
287 |
return sn;
|
jaroslav@1646
|
288 |
}
|
jaroslav@1646
|
289 |
|
jaroslav@1646
|
290 |
/**
|
jaroslav@1646
|
291 |
* Given a bytecode name from a classfile, separate it into
|
jaroslav@1646
|
292 |
* components delimited by dangerous characters.
|
jaroslav@1646
|
293 |
* Each resulting array element will be either a dangerous character,
|
jaroslav@1646
|
294 |
* or else a safe bytecode name.
|
jaroslav@1646
|
295 |
* (The safe name might possibly be mangled to hide further dangerous characters.)
|
jaroslav@1646
|
296 |
* For example, the qualified class name {@code java/lang/String}
|
jaroslav@1646
|
297 |
* will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
|
jaroslav@1646
|
298 |
* The name {@code <init>} will be parsed into { '<', "init", '>'}}
|
jaroslav@1646
|
299 |
* The name {@code foo/bar$:baz} will be parsed into
|
jaroslav@1646
|
300 |
* {@code {"foo", '/', "bar", '$', ':', "baz"}}.
|
jaroslav@1646
|
301 |
* The name {@code ::\=:foo:\=bar\!baz} will be parsed into
|
jaroslav@1646
|
302 |
* {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
|
jaroslav@1646
|
303 |
*/
|
jaroslav@1646
|
304 |
public static Object[] parseBytecodeName(String s) {
|
jaroslav@1646
|
305 |
int slen = s.length();
|
jaroslav@1646
|
306 |
Object[] res = null;
|
jaroslav@1646
|
307 |
for (int pass = 0; pass <= 1; pass++) {
|
jaroslav@1646
|
308 |
int fillp = 0;
|
jaroslav@1646
|
309 |
int lasti = 0;
|
jaroslav@1646
|
310 |
for (int i = 0; i <= slen; i++) {
|
jaroslav@1646
|
311 |
int whichDC = -1;
|
jaroslav@1646
|
312 |
if (i < slen) {
|
jaroslav@1646
|
313 |
whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
|
jaroslav@1646
|
314 |
if (whichDC < DANGEROUS_CHAR_FIRST_INDEX) continue;
|
jaroslav@1646
|
315 |
}
|
jaroslav@1646
|
316 |
// got to end of string or next dangerous char
|
jaroslav@1646
|
317 |
if (lasti < i) {
|
jaroslav@1646
|
318 |
// normal component
|
jaroslav@1646
|
319 |
if (pass != 0)
|
jaroslav@1646
|
320 |
res[fillp] = toSourceName(s.substring(lasti, i));
|
jaroslav@1646
|
321 |
fillp++;
|
jaroslav@1646
|
322 |
lasti = i+1;
|
jaroslav@1646
|
323 |
}
|
jaroslav@1646
|
324 |
if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
|
jaroslav@1646
|
325 |
if (pass != 0)
|
jaroslav@1646
|
326 |
res[fillp] = DANGEROUS_CHARS_CA[whichDC];
|
jaroslav@1646
|
327 |
fillp++;
|
jaroslav@1646
|
328 |
lasti = i+1;
|
jaroslav@1646
|
329 |
}
|
jaroslav@1646
|
330 |
}
|
jaroslav@1646
|
331 |
if (pass != 0) break;
|
jaroslav@1646
|
332 |
// between passes, build the result array
|
jaroslav@1646
|
333 |
res = new Object[fillp];
|
jaroslav@1646
|
334 |
if (fillp <= 1 && lasti == 0) {
|
jaroslav@1646
|
335 |
if (fillp != 0) res[0] = toSourceName(s);
|
jaroslav@1646
|
336 |
break;
|
jaroslav@1646
|
337 |
}
|
jaroslav@1646
|
338 |
}
|
jaroslav@1646
|
339 |
return res;
|
jaroslav@1646
|
340 |
}
|
jaroslav@1646
|
341 |
|
jaroslav@1646
|
342 |
/**
|
jaroslav@1646
|
343 |
* Given a series of components, create a bytecode name for a classfile.
|
jaroslav@1646
|
344 |
* This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
|
jaroslav@1646
|
345 |
* Each component must either be an interned one-character string of
|
jaroslav@1646
|
346 |
* a dangerous character, or else a safe bytecode name.
|
jaroslav@1646
|
347 |
* @param components a series of name components
|
jaroslav@1646
|
348 |
* @return the concatenation of all components
|
jaroslav@1646
|
349 |
* @throws IllegalArgumentException if any component contains an unsafe
|
jaroslav@1646
|
350 |
* character, and is not an interned one-character string
|
jaroslav@1646
|
351 |
* @throws NullPointerException if any component is null
|
jaroslav@1646
|
352 |
*/
|
jaroslav@1646
|
353 |
public static String unparseBytecodeName(Object[] components) {
|
jaroslav@1646
|
354 |
Object[] components0 = components;
|
jaroslav@1646
|
355 |
for (int i = 0; i < components.length; i++) {
|
jaroslav@1646
|
356 |
Object c = components[i];
|
jaroslav@1646
|
357 |
if (c instanceof String) {
|
jaroslav@1646
|
358 |
String mc = toBytecodeName((String) c);
|
jaroslav@1646
|
359 |
if (i == 0 && components.length == 1)
|
jaroslav@1646
|
360 |
return mc; // usual case
|
jaroslav@1646
|
361 |
if ((Object)mc != c) {
|
jaroslav@1646
|
362 |
if (components == components0)
|
jaroslav@1646
|
363 |
components = components.clone();
|
jaroslav@1646
|
364 |
components[i] = c = mc;
|
jaroslav@1646
|
365 |
}
|
jaroslav@1646
|
366 |
}
|
jaroslav@1646
|
367 |
}
|
jaroslav@1646
|
368 |
return appendAll(components);
|
jaroslav@1646
|
369 |
}
|
jaroslav@1646
|
370 |
private static String appendAll(Object[] components) {
|
jaroslav@1646
|
371 |
if (components.length <= 1) {
|
jaroslav@1646
|
372 |
if (components.length == 1) {
|
jaroslav@1646
|
373 |
return String.valueOf(components[0]);
|
jaroslav@1646
|
374 |
}
|
jaroslav@1646
|
375 |
return "";
|
jaroslav@1646
|
376 |
}
|
jaroslav@1646
|
377 |
int slen = 0;
|
jaroslav@1646
|
378 |
for (Object c : components) {
|
jaroslav@1646
|
379 |
if (c instanceof String)
|
jaroslav@1646
|
380 |
slen += String.valueOf(c).length();
|
jaroslav@1646
|
381 |
else
|
jaroslav@1646
|
382 |
slen += 1;
|
jaroslav@1646
|
383 |
}
|
jaroslav@1646
|
384 |
StringBuilder sb = new StringBuilder(slen);
|
jaroslav@1646
|
385 |
for (Object c : components) {
|
jaroslav@1646
|
386 |
sb.append(c);
|
jaroslav@1646
|
387 |
}
|
jaroslav@1646
|
388 |
return sb.toString();
|
jaroslav@1646
|
389 |
}
|
jaroslav@1646
|
390 |
|
jaroslav@1646
|
391 |
/**
|
jaroslav@1646
|
392 |
* Given a bytecode name, produce the corresponding display name.
|
jaroslav@1646
|
393 |
* This is the source name, plus quotes if needed.
|
jaroslav@1646
|
394 |
* If the bytecode name contains dangerous characters,
|
jaroslav@1646
|
395 |
* assume that they are being used as punctuation,
|
jaroslav@1646
|
396 |
* and pass them through unchanged.
|
jaroslav@1646
|
397 |
* Non-empty runs of non-dangerous characters are demangled
|
jaroslav@1646
|
398 |
* if necessary, and the resulting names are quoted if
|
jaroslav@1646
|
399 |
* they are not already valid Java identifiers, or if
|
jaroslav@1646
|
400 |
* they contain a dangerous character (i.e., dollar sign "$").
|
jaroslav@1646
|
401 |
* Single quotes are used when quoting.
|
jaroslav@1646
|
402 |
* Within quoted names, embedded single quotes and backslashes
|
jaroslav@1646
|
403 |
* are further escaped by prepended backslashes.
|
jaroslav@1646
|
404 |
*
|
jaroslav@1646
|
405 |
* @param s the original bytecode name (which may be qualified)
|
jaroslav@1646
|
406 |
* @return a human-readable presentation
|
jaroslav@1646
|
407 |
*/
|
jaroslav@1646
|
408 |
public static String toDisplayName(String s) {
|
jaroslav@1646
|
409 |
Object[] components = parseBytecodeName(s);
|
jaroslav@1646
|
410 |
for (int i = 0; i < components.length; i++) {
|
jaroslav@1646
|
411 |
if (!(components[i] instanceof String))
|
jaroslav@1646
|
412 |
continue;
|
jaroslav@1646
|
413 |
String sn = (String) components[i];
|
jaroslav@1646
|
414 |
// note that the name is already demangled!
|
jaroslav@1646
|
415 |
//sn = toSourceName(sn);
|
jaroslav@1646
|
416 |
if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
|
jaroslav@1646
|
417 |
components[i] = quoteDisplay(sn);
|
jaroslav@1646
|
418 |
}
|
jaroslav@1646
|
419 |
}
|
jaroslav@1646
|
420 |
return appendAll(components);
|
jaroslav@1646
|
421 |
}
|
jaroslav@1646
|
422 |
private static boolean isJavaIdent(String s) {
|
jaroslav@1646
|
423 |
int slen = s.length();
|
jaroslav@1646
|
424 |
if (slen == 0) return false;
|
jaroslav@1646
|
425 |
if (!Character.isJavaIdentifierStart(s.charAt(0)))
|
jaroslav@1646
|
426 |
return false;
|
jaroslav@1646
|
427 |
for (int i = 1; i < slen; i++) {
|
jaroslav@1646
|
428 |
if (!Character.isJavaIdentifierPart(s.charAt(i)))
|
jaroslav@1646
|
429 |
return false;
|
jaroslav@1646
|
430 |
}
|
jaroslav@1646
|
431 |
return true;
|
jaroslav@1646
|
432 |
}
|
jaroslav@1646
|
433 |
private static String quoteDisplay(String s) {
|
jaroslav@1646
|
434 |
// TO DO: Replace wierd characters in s by C-style escapes.
|
jaroslav@1646
|
435 |
return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
|
jaroslav@1646
|
436 |
}
|
jaroslav@1646
|
437 |
|
jaroslav@1646
|
438 |
private static void checkSafeBytecodeName(String s)
|
jaroslav@1646
|
439 |
throws IllegalArgumentException {
|
jaroslav@1646
|
440 |
if (!isSafeBytecodeName(s)) {
|
jaroslav@1646
|
441 |
throw new IllegalArgumentException(s);
|
jaroslav@1646
|
442 |
}
|
jaroslav@1646
|
443 |
}
|
jaroslav@1646
|
444 |
|
jaroslav@1646
|
445 |
/**
|
jaroslav@1646
|
446 |
* Report whether a simple name is safe as a bytecode name.
|
jaroslav@1646
|
447 |
* Such names are acceptable in class files as class, method, and field names.
|
jaroslav@1646
|
448 |
* Additionally, they are free of "dangerous" characters, even if those
|
jaroslav@1646
|
449 |
* characters are legal in some (or all) names in class files.
|
jaroslav@1646
|
450 |
* @param s the proposed bytecode name
|
jaroslav@1646
|
451 |
* @return true if the name is non-empty and all of its characters are safe
|
jaroslav@1646
|
452 |
*/
|
jaroslav@1646
|
453 |
public static boolean isSafeBytecodeName(String s) {
|
jaroslav@1646
|
454 |
if (s.length() == 0) return false;
|
jaroslav@1646
|
455 |
// check occurrences of each DANGEROUS char
|
jaroslav@1646
|
456 |
for (char xc : DANGEROUS_CHARS_A) {
|
jaroslav@1646
|
457 |
if (xc == ESCAPE_C) continue; // not really that dangerous
|
jaroslav@1646
|
458 |
if (s.indexOf(xc) >= 0) return false;
|
jaroslav@1646
|
459 |
}
|
jaroslav@1646
|
460 |
return true;
|
jaroslav@1646
|
461 |
}
|
jaroslav@1646
|
462 |
|
jaroslav@1646
|
463 |
/**
|
jaroslav@1646
|
464 |
* Report whether a character is safe in a bytecode name.
|
jaroslav@1646
|
465 |
* This is true of any unicode character except the following
|
jaroslav@1646
|
466 |
* <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
|
jaroslav@1646
|
467 |
* @param s the proposed character
|
jaroslav@1646
|
468 |
* @return true if the character is safe to use in classfiles
|
jaroslav@1646
|
469 |
*/
|
jaroslav@1646
|
470 |
public static boolean isSafeBytecodeChar(char c) {
|
jaroslav@1646
|
471 |
return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
|
jaroslav@1646
|
472 |
}
|
jaroslav@1646
|
473 |
|
jaroslav@1646
|
474 |
private static boolean looksMangled(String s) {
|
jaroslav@1646
|
475 |
return s.charAt(0) == ESCAPE_C;
|
jaroslav@1646
|
476 |
}
|
jaroslav@1646
|
477 |
|
jaroslav@1646
|
478 |
private static String mangle(String s) {
|
jaroslav@1646
|
479 |
if (s.length() == 0)
|
jaroslav@1646
|
480 |
return NULL_ESCAPE;
|
jaroslav@1646
|
481 |
|
jaroslav@1646
|
482 |
// build this lazily, when we first need an escape:
|
jaroslav@1646
|
483 |
StringBuilder sb = null;
|
jaroslav@1646
|
484 |
|
jaroslav@1646
|
485 |
for (int i = 0, slen = s.length(); i < slen; i++) {
|
jaroslav@1646
|
486 |
char c = s.charAt(i);
|
jaroslav@1646
|
487 |
|
jaroslav@1646
|
488 |
boolean needEscape = false;
|
jaroslav@1646
|
489 |
if (c == ESCAPE_C) {
|
jaroslav@1646
|
490 |
if (i+1 < slen) {
|
jaroslav@1646
|
491 |
char c1 = s.charAt(i+1);
|
jaroslav@1646
|
492 |
if ((i == 0 && c1 == NULL_ESCAPE_C)
|
jaroslav@1646
|
493 |
|| c1 != originalOfReplacement(c1)) {
|
jaroslav@1646
|
494 |
// an accidental escape
|
jaroslav@1646
|
495 |
needEscape = true;
|
jaroslav@1646
|
496 |
}
|
jaroslav@1646
|
497 |
}
|
jaroslav@1646
|
498 |
} else {
|
jaroslav@1646
|
499 |
needEscape = isDangerous(c);
|
jaroslav@1646
|
500 |
}
|
jaroslav@1646
|
501 |
|
jaroslav@1646
|
502 |
if (!needEscape) {
|
jaroslav@1646
|
503 |
if (sb != null) sb.append(c);
|
jaroslav@1646
|
504 |
continue;
|
jaroslav@1646
|
505 |
}
|
jaroslav@1646
|
506 |
|
jaroslav@1646
|
507 |
// build sb if this is the first escape
|
jaroslav@1646
|
508 |
if (sb == null) {
|
jaroslav@1646
|
509 |
sb = new StringBuilder(s.length()+10);
|
jaroslav@1646
|
510 |
// mangled names must begin with a backslash:
|
jaroslav@1646
|
511 |
if (s.charAt(0) != ESCAPE_C && i > 0)
|
jaroslav@1646
|
512 |
sb.append(NULL_ESCAPE);
|
jaroslav@1646
|
513 |
// append the string so far, which is unremarkable:
|
jaroslav@1646
|
514 |
sb.append(s.substring(0, i));
|
jaroslav@1646
|
515 |
}
|
jaroslav@1646
|
516 |
|
jaroslav@1646
|
517 |
// rewrite \ to \-, / to \|, etc.
|
jaroslav@1646
|
518 |
sb.append(ESCAPE_C);
|
jaroslav@1646
|
519 |
sb.append(replacementOf(c));
|
jaroslav@1646
|
520 |
}
|
jaroslav@1646
|
521 |
|
jaroslav@1646
|
522 |
if (sb != null) return sb.toString();
|
jaroslav@1646
|
523 |
|
jaroslav@1646
|
524 |
return s;
|
jaroslav@1646
|
525 |
}
|
jaroslav@1646
|
526 |
|
jaroslav@1646
|
527 |
private static String demangle(String s) {
|
jaroslav@1646
|
528 |
// build this lazily, when we first meet an escape:
|
jaroslav@1646
|
529 |
StringBuilder sb = null;
|
jaroslav@1646
|
530 |
|
jaroslav@1646
|
531 |
int stringStart = 0;
|
jaroslav@1646
|
532 |
if (s.startsWith(NULL_ESCAPE))
|
jaroslav@1646
|
533 |
stringStart = 2;
|
jaroslav@1646
|
534 |
|
jaroslav@1646
|
535 |
for (int i = stringStart, slen = s.length(); i < slen; i++) {
|
jaroslav@1646
|
536 |
char c = s.charAt(i);
|
jaroslav@1646
|
537 |
|
jaroslav@1646
|
538 |
if (c == ESCAPE_C && i+1 < slen) {
|
jaroslav@1646
|
539 |
// might be an escape sequence
|
jaroslav@1646
|
540 |
char rc = s.charAt(i+1);
|
jaroslav@1646
|
541 |
char oc = originalOfReplacement(rc);
|
jaroslav@1646
|
542 |
if (oc != rc) {
|
jaroslav@1646
|
543 |
// build sb if this is the first escape
|
jaroslav@1646
|
544 |
if (sb == null) {
|
jaroslav@1646
|
545 |
sb = new StringBuilder(s.length());
|
jaroslav@1646
|
546 |
// append the string so far, which is unremarkable:
|
jaroslav@1646
|
547 |
sb.append(s.substring(stringStart, i));
|
jaroslav@1646
|
548 |
}
|
jaroslav@1646
|
549 |
++i; // skip both characters
|
jaroslav@1646
|
550 |
c = oc;
|
jaroslav@1646
|
551 |
}
|
jaroslav@1646
|
552 |
}
|
jaroslav@1646
|
553 |
|
jaroslav@1646
|
554 |
if (sb != null)
|
jaroslav@1646
|
555 |
sb.append(c);
|
jaroslav@1646
|
556 |
}
|
jaroslav@1646
|
557 |
|
jaroslav@1646
|
558 |
if (sb != null) return sb.toString();
|
jaroslav@1646
|
559 |
|
jaroslav@1646
|
560 |
return s.substring(stringStart);
|
jaroslav@1646
|
561 |
}
|
jaroslav@1646
|
562 |
|
jaroslav@1646
|
563 |
static char ESCAPE_C = '\\';
|
jaroslav@1646
|
564 |
// empty escape sequence to avoid a null name or illegal prefix
|
jaroslav@1646
|
565 |
static char NULL_ESCAPE_C = '=';
|
jaroslav@1646
|
566 |
static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
|
jaroslav@1646
|
567 |
|
jaroslav@1646
|
568 |
static final String DANGEROUS_CHARS = "\\/.;:$[]<>"; // \\ must be first
|
jaroslav@1646
|
569 |
static final String REPLACEMENT_CHARS = "-|,?!%{}^_";
|
jaroslav@1646
|
570 |
static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
|
jaroslav@1646
|
571 |
static char[] DANGEROUS_CHARS_A = DANGEROUS_CHARS.toCharArray();
|
jaroslav@1646
|
572 |
static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
|
jaroslav@1646
|
573 |
static final Character[] DANGEROUS_CHARS_CA;
|
jaroslav@1646
|
574 |
static {
|
jaroslav@1646
|
575 |
Character[] dcca = new Character[DANGEROUS_CHARS.length()];
|
jaroslav@1646
|
576 |
for (int i = 0; i < dcca.length; i++)
|
jaroslav@1646
|
577 |
dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
|
jaroslav@1646
|
578 |
DANGEROUS_CHARS_CA = dcca;
|
jaroslav@1646
|
579 |
}
|
jaroslav@1646
|
580 |
|
jaroslav@1646
|
581 |
static final long[] SPECIAL_BITMAP = new long[2]; // 128 bits
|
jaroslav@1646
|
582 |
static {
|
jaroslav@1646
|
583 |
String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
|
jaroslav@1646
|
584 |
//System.out.println("SPECIAL = "+SPECIAL);
|
jaroslav@1646
|
585 |
for (char c : SPECIAL.toCharArray()) {
|
jaroslav@1646
|
586 |
SPECIAL_BITMAP[c >>> 6] |= 1L << c;
|
jaroslav@1646
|
587 |
}
|
jaroslav@1646
|
588 |
}
|
jaroslav@1646
|
589 |
static boolean isSpecial(char c) {
|
jaroslav@1646
|
590 |
if ((c >>> 6) < SPECIAL_BITMAP.length)
|
jaroslav@1646
|
591 |
return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
|
jaroslav@1646
|
592 |
else
|
jaroslav@1646
|
593 |
return false;
|
jaroslav@1646
|
594 |
}
|
jaroslav@1646
|
595 |
static char replacementOf(char c) {
|
jaroslav@1646
|
596 |
if (!isSpecial(c)) return c;
|
jaroslav@1646
|
597 |
int i = DANGEROUS_CHARS.indexOf(c);
|
jaroslav@1646
|
598 |
if (i < 0) return c;
|
jaroslav@1646
|
599 |
return REPLACEMENT_CHARS.charAt(i);
|
jaroslav@1646
|
600 |
}
|
jaroslav@1646
|
601 |
static char originalOfReplacement(char c) {
|
jaroslav@1646
|
602 |
if (!isSpecial(c)) return c;
|
jaroslav@1646
|
603 |
int i = REPLACEMENT_CHARS.indexOf(c);
|
jaroslav@1646
|
604 |
if (i < 0) return c;
|
jaroslav@1646
|
605 |
return DANGEROUS_CHARS.charAt(i);
|
jaroslav@1646
|
606 |
}
|
jaroslav@1646
|
607 |
static boolean isDangerous(char c) {
|
jaroslav@1646
|
608 |
if (!isSpecial(c)) return false;
|
jaroslav@1646
|
609 |
return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
|
jaroslav@1646
|
610 |
}
|
jaroslav@1646
|
611 |
static int indexOfDangerousChar(String s, int from) {
|
jaroslav@1646
|
612 |
for (int i = from, slen = s.length(); i < slen; i++) {
|
jaroslav@1646
|
613 |
if (isDangerous(s.charAt(i)))
|
jaroslav@1646
|
614 |
return i;
|
jaroslav@1646
|
615 |
}
|
jaroslav@1646
|
616 |
return -1;
|
jaroslav@1646
|
617 |
}
|
jaroslav@1646
|
618 |
static int lastIndexOfDangerousChar(String s, int from) {
|
jaroslav@1646
|
619 |
for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
|
jaroslav@1646
|
620 |
if (isDangerous(s.charAt(i)))
|
jaroslav@1646
|
621 |
return i;
|
jaroslav@1646
|
622 |
}
|
jaroslav@1646
|
623 |
return -1;
|
jaroslav@1646
|
624 |
}
|
jaroslav@1646
|
625 |
|
jaroslav@1646
|
626 |
|
jaroslav@1646
|
627 |
}
|