1.1 --- a/rt/emul/compact/src/main/java/sun/invoke/util/BytecodeName.java Sun Aug 17 20:09:05 2014 +0200
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,627 +0,0 @@
1.4 -/*
1.5 - * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
1.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1.7 - *
1.8 - * This code is free software; you can redistribute it and/or modify it
1.9 - * under the terms of the GNU General Public License version 2 only, as
1.10 - * published by the Free Software Foundation. Oracle designates this
1.11 - * particular file as subject to the "Classpath" exception as provided
1.12 - * by Oracle in the LICENSE file that accompanied this code.
1.13 - *
1.14 - * This code is distributed in the hope that it will be useful, but WITHOUT
1.15 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.16 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1.17 - * version 2 for more details (a copy is included in the LICENSE file that
1.18 - * accompanied this code).
1.19 - *
1.20 - * You should have received a copy of the GNU General Public License version
1.21 - * 2 along with this work; if not, write to the Free Software Foundation,
1.22 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1.23 - *
1.24 - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1.25 - * or visit www.oracle.com if you need additional information or have any
1.26 - * questions.
1.27 - */
1.28 -
1.29 -package sun.invoke.util;
1.30 -
1.31 -/**
1.32 - * Utility routines for dealing with bytecode-level names.
1.33 - * Includes universal mangling rules for the JVM.
1.34 - *
1.35 - * <h3>Avoiding Dangerous Characters </h3>
1.36 - *
1.37 - * <p>
1.38 - * The JVM defines a very small set of characters which are illegal
1.39 - * in name spellings. We will slightly extend and regularize this set
1.40 - * into a group of <cite>dangerous characters</cite>.
1.41 - * These characters will then be replaced, in mangled names, by escape sequences.
1.42 - * In addition, accidental escape sequences must be further escaped.
1.43 - * Finally, a special prefix will be applied if and only if
1.44 - * the mangling would otherwise fail to begin with the escape character.
1.45 - * This happens to cover the corner case of the null string,
1.46 - * and also clearly marks symbols which need demangling.
1.47 - * </p>
1.48 - * <p>
1.49 - * Dangerous characters are the union of all characters forbidden
1.50 - * or otherwise restricted by the JVM specification,
1.51 - * plus their mates, if they are brackets
1.52 - * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
1.53 - * <code><big><b><</b></big></code> and <code><big><b>></b></big></code>),
1.54 - * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
1.55 - * There is no distinction between type, method, and field names.
1.56 - * This makes it easier to convert between mangled names of different
1.57 - * types, since they do not need to be decoded (demangled).
1.58 - * </p>
1.59 - * <p>
1.60 - * The escape character is backslash <code><big><b>\</b></big></code>
1.61 - * (also known as reverse solidus).
1.62 - * This character is, until now, unheard of in bytecode names,
1.63 - * but traditional in the proposed role.
1.64 - *
1.65 - * </p>
1.66 - * <h3> Replacement Characters </h3>
1.67 - *
1.68 - *
1.69 - * <p>
1.70 - * Every escape sequence is two characters
1.71 - * (in fact, two UTF8 bytes) beginning with
1.72 - * the escape character and followed by a
1.73 - * <cite>replacement character</cite>.
1.74 - * (Since the replacement character is never a backslash,
1.75 - * iterated manglings do not double in size.)
1.76 - * </p>
1.77 - * <p>
1.78 - * Each dangerous character has some rough visual similarity
1.79 - * to its corresponding replacement character.
1.80 - * This makes mangled symbols easier to recognize by sight.
1.81 - * </p>
1.82 - * <p>
1.83 - * The dangerous characters are
1.84 - * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
1.85 - * <code><big><b>.</b></big></code> (dot, also a package delimiter),
1.86 - * <code><big><b>;</b></big></code> (semicolon, used in signatures),
1.87 - * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
1.88 - * <code><big><b><</b></big></code> (left angle),
1.89 - * <code><big><b>></b></big></code> (right angle),
1.90 - * <code><big><b>[</b></big></code> (left square bracket, used in array types),
1.91 - * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
1.92 - * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
1.93 - * Their replacements are, respectively,
1.94 - * <code><big><b>|</b></big></code> (vertical bar),
1.95 - * <code><big><b>,</b></big></code> (comma),
1.96 - * <code><big><b>?</b></big></code> (question mark),
1.97 - * <code><big><b>%</b></big></code> (percent),
1.98 - * <code><big><b>^</b></big></code> (caret),
1.99 - * <code><big><b>_</b></big></code> (underscore), and
1.100 - * <code><big><b>{</b></big></code> (left curly bracket),
1.101 - * <code><big><b>}</b></big></code> (right curly bracket),
1.102 - * <code><big><b>!</b></big></code> (exclamation mark).
1.103 - * In addition, the replacement character for the escape character itself is
1.104 - * <code><big><b>-</b></big></code> (hyphen),
1.105 - * and the replacement character for the null prefix is
1.106 - * <code><big><b>=</b></big></code> (equal sign).
1.107 - * </p>
1.108 - * <p>
1.109 - * An escape character <code><big><b>\</b></big></code>
1.110 - * followed by any of these replacement characters
1.111 - * is an escape sequence, and there are no other escape sequences.
1.112 - * An equal sign is only part of an escape sequence
1.113 - * if it is the second character in the whole string, following a backslash.
1.114 - * Two consecutive backslashes do <em>not</em> form an escape sequence.
1.115 - * </p>
1.116 - * <p>
1.117 - * Each escape sequence replaces a so-called <cite>original character</cite>
1.118 - * which is either one of the dangerous characters or the escape character.
1.119 - * A null prefix replaces an initial null string, not a character.
1.120 - * </p>
1.121 - * <p>
1.122 - * All this implies that escape sequences cannot overlap and may be
1.123 - * determined all at once for a whole string. Note that a spelling
1.124 - * string can contain <cite>accidental escapes</cite>, apparent escape
1.125 - * sequences which must not be interpreted as manglings.
1.126 - * These are disabled by replacing their leading backslash with an
1.127 - * escape sequence (<code><big><b>\-</b></big></code>). To mangle a string, three logical steps
1.128 - * are required, though they may be carried out in one pass:
1.129 - * </p>
1.130 - * <ol>
1.131 - * <li>In each accidental escape, replace the backslash with an escape sequence
1.132 - * (<code><big><b>\-</b></big></code>).</li>
1.133 - * <li>Replace each dangerous character with an escape sequence
1.134 - * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
1.135 - * <li>If the first two steps introduced any change, <em>and</em>
1.136 - * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
1.137 - * </ol>
1.138 - *
1.139 - * To demangle a mangled string that begins with an escape,
1.140 - * remove any null prefix, and then replace (in parallel)
1.141 - * each escape sequence by its original character.
1.142 - * <p>Spelling strings which contain accidental
1.143 - * escapes <em>must</em> have them replaced, even if those
1.144 - * strings do not contain dangerous characters.
1.145 - * This restriction means that mangling a string always
1.146 - * requires a scan of the string for escapes.
1.147 - * But then, a scan would be required anyway,
1.148 - * to check for dangerous characters.
1.149 - *
1.150 - * </p>
1.151 - * <h3> Nice Properties </h3>
1.152 - *
1.153 - * <p>
1.154 - * If a bytecode name does not contain any escape sequence,
1.155 - * demangling is a no-op: The string demangles to itself.
1.156 - * Such a string is called <cite>self-mangling</cite>.
1.157 - * Almost all strings are self-mangling.
1.158 - * In practice, to demangle almost any name “found in nature”,
1.159 - * simply verify that it does not begin with a backslash.
1.160 - * </p>
1.161 - * <p>
1.162 - * Mangling is a one-to-one function, while demangling
1.163 - * is a many-to-one function.
1.164 - * A mangled string is defined as <cite>validly mangled</cite> if
1.165 - * it is in fact the unique mangling of its spelling string.
1.166 - * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
1.167 - * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
1.168 - * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
1.169 - * If a language back-end or runtime is using mangled names,
1.170 - * it should never present an invalidly mangled bytecode
1.171 - * name to the JVM. If the runtime encounters one,
1.172 - * it should also report an error, since such an occurrence
1.173 - * probably indicates a bug in name encoding which
1.174 - * will lead to errors in linkage.
1.175 - * However, this note does not propose that the JVM verifier
1.176 - * detect invalidly mangled names.
1.177 - * </p>
1.178 - * <p>
1.179 - * As a result of these rules, it is a simple matter to
1.180 - * compute validly mangled substrings and concatenations
1.181 - * of validly mangled strings, and (with a little care)
1.182 - * these correspond to corresponding operations on their
1.183 - * spelling strings.
1.184 - * </p>
1.185 - * <ul>
1.186 - * <li>Any prefix of a validly mangled string is also validly mangled,
1.187 - * although a null prefix may need to be removed.</li>
1.188 - * <li>Any suffix of a validly mangled string is also validly mangled,
1.189 - * although a null prefix may need to be added.</li>
1.190 - * <li>Two validly mangled strings, when concatenated,
1.191 - * are also validly mangled, although any null prefix
1.192 - * must be removed from the second string,
1.193 - * and a trailing backslash on the first string may need escaping,
1.194 - * if it would participate in an accidental escape when followed
1.195 - * by the first character of the second string.</li>
1.196 - * </ul>
1.197 - * <p>If languages that include non-Java symbol spellings use this
1.198 - * mangling convention, they will enjoy the following advantages:
1.199 - * </p>
1.200 - * <ul>
1.201 - * <li>They can interoperate via symbols they share in common.</li>
1.202 - * <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
1.203 - * <li>Future JVM and language extensions can safely use the dangerous characters
1.204 - * for structuring symbols, but will never interfere with valid spellings.</li>
1.205 - * <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
1.206 - * <li>Occasional transliterations and name composition will be simple and regular,
1.207 - * for classes, methods, and fields.</li>
1.208 - * <li>Bytecode names will continue to be compact.
1.209 - * When mangled, spellings will at most double in length, either in
1.210 - * UTF8 or UTF16 format, and most will not change at all.</li>
1.211 - * </ul>
1.212 - *
1.213 - *
1.214 - * <h3> Suggestions for Human Readable Presentations </h3>
1.215 - *
1.216 - *
1.217 - * <p>
1.218 - * For human readable displays of symbols,
1.219 - * it will be better to present a string-like quoted
1.220 - * representation of the spelling, because JVM users
1.221 - * are generally familiar with such tokens.
1.222 - * We suggest using single or double quotes before and after
1.223 - * mangled symbols which are not valid Java identifiers,
1.224 - * with quotes, backslashes, and non-printing characters
1.225 - * escaped as if for literals in the Java language.
1.226 - * </p>
1.227 - * <p>
1.228 - * For example, an HTML-like spelling
1.229 - * <code><big><b><pre></b></big></code> mangles to
1.230 - * <code><big><b>\^pre\_</b></big></code> and could
1.231 - * display more cleanly as
1.232 - * <code><big><b>'<pre>'</b></big></code>,
1.233 - * with the quotes included.
1.234 - * Such string-like conventions are <em>not</em> suitable
1.235 - * for mangled bytecode names, in part because
1.236 - * dangerous characters must be eliminated, rather
1.237 - * than just quoted. Otherwise internally structured
1.238 - * strings like package prefixes and method signatures
1.239 - * could not be reliably parsed.
1.240 - * </p>
1.241 - * <p>
1.242 - * In such human-readable displays, invalidly mangled
1.243 - * names should <em>not</em> be demangled and quoted,
1.244 - * for this would be misleading. Likewise, JVM symbols
1.245 - * which contain dangerous characters (like dots in field
1.246 - * names or brackets in method names) should not be
1.247 - * simply quoted. The bytecode names
1.248 - * <code><big><b>\=phase\,1</b></big></code> and
1.249 - * <code><big><b>phase.1</b></big></code> are distinct,
1.250 - * and in demangled displays they should be presented as
1.251 - * <code><big><b>'phase.1'</b></big></code> and something like
1.252 - * <code><big><b>'phase'.1</b></big></code>, respectively.
1.253 - * </p>
1.254 - *
1.255 - * @author John Rose
1.256 - * @version 1.2, 02/06/2008
1.257 - * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
1.258 - */
1.259 -public class BytecodeName {
1.260 - private BytecodeName() { } // static only class
1.261 -
1.262 - /** Given a source name, produce the corresponding bytecode name.
1.263 - * The source name should not be qualified, because any syntactic
1.264 - * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
1.265 - * @param s the source name
1.266 - * @return a valid bytecode name which represents the source name
1.267 - */
1.268 - public static String toBytecodeName(String s) {
1.269 - String bn = mangle(s);
1.270 - assert((Object)bn == s || looksMangled(bn)) : bn;
1.271 - assert(s.equals(toSourceName(bn))) : s;
1.272 - return bn;
1.273 - }
1.274 -
1.275 - /** Given an unqualified bytecode name, produce the corresponding source name.
1.276 - * The bytecode name must not contain dangerous characters.
1.277 - * In particular, it must not be qualified or segmented by colon {@code ':'}.
1.278 - * @param s the bytecode name
1.279 - * @return the source name, which may possibly have unsafe characters
1.280 - * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
1.281 - * @see #isSafeBytecodeName(java.lang.String)
1.282 - */
1.283 - public static String toSourceName(String s) {
1.284 - checkSafeBytecodeName(s);
1.285 - String sn = s;
1.286 - if (looksMangled(s)) {
1.287 - sn = demangle(s);
1.288 - assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
1.289 - }
1.290 - return sn;
1.291 - }
1.292 -
1.293 - /**
1.294 - * Given a bytecode name from a classfile, separate it into
1.295 - * components delimited by dangerous characters.
1.296 - * Each resulting array element will be either a dangerous character,
1.297 - * or else a safe bytecode name.
1.298 - * (The safe name might possibly be mangled to hide further dangerous characters.)
1.299 - * For example, the qualified class name {@code java/lang/String}
1.300 - * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
1.301 - * The name {@code <init>} will be parsed into { '<', "init", '>'}}
1.302 - * The name {@code foo/bar$:baz} will be parsed into
1.303 - * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
1.304 - * The name {@code ::\=:foo:\=bar\!baz} will be parsed into
1.305 - * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
1.306 - */
1.307 - public static Object[] parseBytecodeName(String s) {
1.308 - int slen = s.length();
1.309 - Object[] res = null;
1.310 - for (int pass = 0; pass <= 1; pass++) {
1.311 - int fillp = 0;
1.312 - int lasti = 0;
1.313 - for (int i = 0; i <= slen; i++) {
1.314 - int whichDC = -1;
1.315 - if (i < slen) {
1.316 - whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
1.317 - if (whichDC < DANGEROUS_CHAR_FIRST_INDEX) continue;
1.318 - }
1.319 - // got to end of string or next dangerous char
1.320 - if (lasti < i) {
1.321 - // normal component
1.322 - if (pass != 0)
1.323 - res[fillp] = toSourceName(s.substring(lasti, i));
1.324 - fillp++;
1.325 - lasti = i+1;
1.326 - }
1.327 - if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
1.328 - if (pass != 0)
1.329 - res[fillp] = DANGEROUS_CHARS_CA[whichDC];
1.330 - fillp++;
1.331 - lasti = i+1;
1.332 - }
1.333 - }
1.334 - if (pass != 0) break;
1.335 - // between passes, build the result array
1.336 - res = new Object[fillp];
1.337 - if (fillp <= 1 && lasti == 0) {
1.338 - if (fillp != 0) res[0] = toSourceName(s);
1.339 - break;
1.340 - }
1.341 - }
1.342 - return res;
1.343 - }
1.344 -
1.345 - /**
1.346 - * Given a series of components, create a bytecode name for a classfile.
1.347 - * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
1.348 - * Each component must either be an interned one-character string of
1.349 - * a dangerous character, or else a safe bytecode name.
1.350 - * @param components a series of name components
1.351 - * @return the concatenation of all components
1.352 - * @throws IllegalArgumentException if any component contains an unsafe
1.353 - * character, and is not an interned one-character string
1.354 - * @throws NullPointerException if any component is null
1.355 - */
1.356 - public static String unparseBytecodeName(Object[] components) {
1.357 - Object[] components0 = components;
1.358 - for (int i = 0; i < components.length; i++) {
1.359 - Object c = components[i];
1.360 - if (c instanceof String) {
1.361 - String mc = toBytecodeName((String) c);
1.362 - if (i == 0 && components.length == 1)
1.363 - return mc; // usual case
1.364 - if ((Object)mc != c) {
1.365 - if (components == components0)
1.366 - components = components.clone();
1.367 - components[i] = c = mc;
1.368 - }
1.369 - }
1.370 - }
1.371 - return appendAll(components);
1.372 - }
1.373 - private static String appendAll(Object[] components) {
1.374 - if (components.length <= 1) {
1.375 - if (components.length == 1) {
1.376 - return String.valueOf(components[0]);
1.377 - }
1.378 - return "";
1.379 - }
1.380 - int slen = 0;
1.381 - for (Object c : components) {
1.382 - if (c instanceof String)
1.383 - slen += String.valueOf(c).length();
1.384 - else
1.385 - slen += 1;
1.386 - }
1.387 - StringBuilder sb = new StringBuilder(slen);
1.388 - for (Object c : components) {
1.389 - sb.append(c);
1.390 - }
1.391 - return sb.toString();
1.392 - }
1.393 -
1.394 - /**
1.395 - * Given a bytecode name, produce the corresponding display name.
1.396 - * This is the source name, plus quotes if needed.
1.397 - * If the bytecode name contains dangerous characters,
1.398 - * assume that they are being used as punctuation,
1.399 - * and pass them through unchanged.
1.400 - * Non-empty runs of non-dangerous characters are demangled
1.401 - * if necessary, and the resulting names are quoted if
1.402 - * they are not already valid Java identifiers, or if
1.403 - * they contain a dangerous character (i.e., dollar sign "$").
1.404 - * Single quotes are used when quoting.
1.405 - * Within quoted names, embedded single quotes and backslashes
1.406 - * are further escaped by prepended backslashes.
1.407 - *
1.408 - * @param s the original bytecode name (which may be qualified)
1.409 - * @return a human-readable presentation
1.410 - */
1.411 - public static String toDisplayName(String s) {
1.412 - Object[] components = parseBytecodeName(s);
1.413 - for (int i = 0; i < components.length; i++) {
1.414 - if (!(components[i] instanceof String))
1.415 - continue;
1.416 - String sn = (String) components[i];
1.417 - // note that the name is already demangled!
1.418 - //sn = toSourceName(sn);
1.419 - if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
1.420 - components[i] = quoteDisplay(sn);
1.421 - }
1.422 - }
1.423 - return appendAll(components);
1.424 - }
1.425 - private static boolean isJavaIdent(String s) {
1.426 - int slen = s.length();
1.427 - if (slen == 0) return false;
1.428 - if (!Character.isJavaIdentifierStart(s.charAt(0)))
1.429 - return false;
1.430 - for (int i = 1; i < slen; i++) {
1.431 - if (!Character.isJavaIdentifierPart(s.charAt(i)))
1.432 - return false;
1.433 - }
1.434 - return true;
1.435 - }
1.436 - private static String quoteDisplay(String s) {
1.437 - // TO DO: Replace wierd characters in s by C-style escapes.
1.438 - return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
1.439 - }
1.440 -
1.441 - private static void checkSafeBytecodeName(String s)
1.442 - throws IllegalArgumentException {
1.443 - if (!isSafeBytecodeName(s)) {
1.444 - throw new IllegalArgumentException(s);
1.445 - }
1.446 - }
1.447 -
1.448 - /**
1.449 - * Report whether a simple name is safe as a bytecode name.
1.450 - * Such names are acceptable in class files as class, method, and field names.
1.451 - * Additionally, they are free of "dangerous" characters, even if those
1.452 - * characters are legal in some (or all) names in class files.
1.453 - * @param s the proposed bytecode name
1.454 - * @return true if the name is non-empty and all of its characters are safe
1.455 - */
1.456 - public static boolean isSafeBytecodeName(String s) {
1.457 - if (s.length() == 0) return false;
1.458 - // check occurrences of each DANGEROUS char
1.459 - for (char xc : DANGEROUS_CHARS_A) {
1.460 - if (xc == ESCAPE_C) continue; // not really that dangerous
1.461 - if (s.indexOf(xc) >= 0) return false;
1.462 - }
1.463 - return true;
1.464 - }
1.465 -
1.466 - /**
1.467 - * Report whether a character is safe in a bytecode name.
1.468 - * This is true of any unicode character except the following
1.469 - * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
1.470 - * @param s the proposed character
1.471 - * @return true if the character is safe to use in classfiles
1.472 - */
1.473 - public static boolean isSafeBytecodeChar(char c) {
1.474 - return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
1.475 - }
1.476 -
1.477 - private static boolean looksMangled(String s) {
1.478 - return s.charAt(0) == ESCAPE_C;
1.479 - }
1.480 -
1.481 - private static String mangle(String s) {
1.482 - if (s.length() == 0)
1.483 - return NULL_ESCAPE;
1.484 -
1.485 - // build this lazily, when we first need an escape:
1.486 - StringBuilder sb = null;
1.487 -
1.488 - for (int i = 0, slen = s.length(); i < slen; i++) {
1.489 - char c = s.charAt(i);
1.490 -
1.491 - boolean needEscape = false;
1.492 - if (c == ESCAPE_C) {
1.493 - if (i+1 < slen) {
1.494 - char c1 = s.charAt(i+1);
1.495 - if ((i == 0 && c1 == NULL_ESCAPE_C)
1.496 - || c1 != originalOfReplacement(c1)) {
1.497 - // an accidental escape
1.498 - needEscape = true;
1.499 - }
1.500 - }
1.501 - } else {
1.502 - needEscape = isDangerous(c);
1.503 - }
1.504 -
1.505 - if (!needEscape) {
1.506 - if (sb != null) sb.append(c);
1.507 - continue;
1.508 - }
1.509 -
1.510 - // build sb if this is the first escape
1.511 - if (sb == null) {
1.512 - sb = new StringBuilder(s.length()+10);
1.513 - // mangled names must begin with a backslash:
1.514 - if (s.charAt(0) != ESCAPE_C && i > 0)
1.515 - sb.append(NULL_ESCAPE);
1.516 - // append the string so far, which is unremarkable:
1.517 - sb.append(s.substring(0, i));
1.518 - }
1.519 -
1.520 - // rewrite \ to \-, / to \|, etc.
1.521 - sb.append(ESCAPE_C);
1.522 - sb.append(replacementOf(c));
1.523 - }
1.524 -
1.525 - if (sb != null) return sb.toString();
1.526 -
1.527 - return s;
1.528 - }
1.529 -
1.530 - private static String demangle(String s) {
1.531 - // build this lazily, when we first meet an escape:
1.532 - StringBuilder sb = null;
1.533 -
1.534 - int stringStart = 0;
1.535 - if (s.startsWith(NULL_ESCAPE))
1.536 - stringStart = 2;
1.537 -
1.538 - for (int i = stringStart, slen = s.length(); i < slen; i++) {
1.539 - char c = s.charAt(i);
1.540 -
1.541 - if (c == ESCAPE_C && i+1 < slen) {
1.542 - // might be an escape sequence
1.543 - char rc = s.charAt(i+1);
1.544 - char oc = originalOfReplacement(rc);
1.545 - if (oc != rc) {
1.546 - // build sb if this is the first escape
1.547 - if (sb == null) {
1.548 - sb = new StringBuilder(s.length());
1.549 - // append the string so far, which is unremarkable:
1.550 - sb.append(s.substring(stringStart, i));
1.551 - }
1.552 - ++i; // skip both characters
1.553 - c = oc;
1.554 - }
1.555 - }
1.556 -
1.557 - if (sb != null)
1.558 - sb.append(c);
1.559 - }
1.560 -
1.561 - if (sb != null) return sb.toString();
1.562 -
1.563 - return s.substring(stringStart);
1.564 - }
1.565 -
1.566 - static char ESCAPE_C = '\\';
1.567 - // empty escape sequence to avoid a null name or illegal prefix
1.568 - static char NULL_ESCAPE_C = '=';
1.569 - static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
1.570 -
1.571 - static final String DANGEROUS_CHARS = "\\/.;:$[]<>"; // \\ must be first
1.572 - static final String REPLACEMENT_CHARS = "-|,?!%{}^_";
1.573 - static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
1.574 - static char[] DANGEROUS_CHARS_A = DANGEROUS_CHARS.toCharArray();
1.575 - static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
1.576 - static final Character[] DANGEROUS_CHARS_CA;
1.577 - static {
1.578 - Character[] dcca = new Character[DANGEROUS_CHARS.length()];
1.579 - for (int i = 0; i < dcca.length; i++)
1.580 - dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
1.581 - DANGEROUS_CHARS_CA = dcca;
1.582 - }
1.583 -
1.584 - static final long[] SPECIAL_BITMAP = new long[2]; // 128 bits
1.585 - static {
1.586 - String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
1.587 - //System.out.println("SPECIAL = "+SPECIAL);
1.588 - for (char c : SPECIAL.toCharArray()) {
1.589 - SPECIAL_BITMAP[c >>> 6] |= 1L << c;
1.590 - }
1.591 - }
1.592 - static boolean isSpecial(char c) {
1.593 - if ((c >>> 6) < SPECIAL_BITMAP.length)
1.594 - return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
1.595 - else
1.596 - return false;
1.597 - }
1.598 - static char replacementOf(char c) {
1.599 - if (!isSpecial(c)) return c;
1.600 - int i = DANGEROUS_CHARS.indexOf(c);
1.601 - if (i < 0) return c;
1.602 - return REPLACEMENT_CHARS.charAt(i);
1.603 - }
1.604 - static char originalOfReplacement(char c) {
1.605 - if (!isSpecial(c)) return c;
1.606 - int i = REPLACEMENT_CHARS.indexOf(c);
1.607 - if (i < 0) return c;
1.608 - return DANGEROUS_CHARS.charAt(i);
1.609 - }
1.610 - static boolean isDangerous(char c) {
1.611 - if (!isSpecial(c)) return false;
1.612 - return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
1.613 - }
1.614 - static int indexOfDangerousChar(String s, int from) {
1.615 - for (int i = from, slen = s.length(); i < slen; i++) {
1.616 - if (isDangerous(s.charAt(i)))
1.617 - return i;
1.618 - }
1.619 - return -1;
1.620 - }
1.621 - static int lastIndexOfDangerousChar(String s, int from) {
1.622 - for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
1.623 - if (isDangerous(s.charAt(i)))
1.624 - return i;
1.625 - }
1.626 - return -1;
1.627 - }
1.628 -
1.629 -
1.630 -}