1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/rt/emul/compact/src/main/java/sun/invoke/util/BytecodeName.java Sat Aug 09 11:11:13 2014 +0200
1.3 @@ -0,0 +1,627 @@
1.4 +/*
1.5 + * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1.7 + *
1.8 + * This code is free software; you can redistribute it and/or modify it
1.9 + * under the terms of the GNU General Public License version 2 only, as
1.10 + * published by the Free Software Foundation. Oracle designates this
1.11 + * particular file as subject to the "Classpath" exception as provided
1.12 + * by Oracle in the LICENSE file that accompanied this code.
1.13 + *
1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1.17 + * version 2 for more details (a copy is included in the LICENSE file that
1.18 + * accompanied this code).
1.19 + *
1.20 + * You should have received a copy of the GNU General Public License version
1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1.23 + *
1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1.25 + * or visit www.oracle.com if you need additional information or have any
1.26 + * questions.
1.27 + */
1.28 +
1.29 +package sun.invoke.util;
1.30 +
1.31 +/**
1.32 + * Utility routines for dealing with bytecode-level names.
1.33 + * Includes universal mangling rules for the JVM.
1.34 + *
1.35 + * <h3>Avoiding Dangerous Characters </h3>
1.36 + *
1.37 + * <p>
1.38 + * The JVM defines a very small set of characters which are illegal
1.39 + * in name spellings. We will slightly extend and regularize this set
1.40 + * into a group of <cite>dangerous characters</cite>.
1.41 + * These characters will then be replaced, in mangled names, by escape sequences.
1.42 + * In addition, accidental escape sequences must be further escaped.
1.43 + * Finally, a special prefix will be applied if and only if
1.44 + * the mangling would otherwise fail to begin with the escape character.
1.45 + * This happens to cover the corner case of the null string,
1.46 + * and also clearly marks symbols which need demangling.
1.47 + * </p>
1.48 + * <p>
1.49 + * Dangerous characters are the union of all characters forbidden
1.50 + * or otherwise restricted by the JVM specification,
1.51 + * plus their mates, if they are brackets
1.52 + * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
1.53 + * <code><big><b><</b></big></code> and <code><big><b>></b></big></code>),
1.54 + * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
1.55 + * There is no distinction between type, method, and field names.
1.56 + * This makes it easier to convert between mangled names of different
1.57 + * types, since they do not need to be decoded (demangled).
1.58 + * </p>
1.59 + * <p>
1.60 + * The escape character is backslash <code><big><b>\</b></big></code>
1.61 + * (also known as reverse solidus).
1.62 + * This character is, until now, unheard of in bytecode names,
1.63 + * but traditional in the proposed role.
1.64 + *
1.65 + * </p>
1.66 + * <h3> Replacement Characters </h3>
1.67 + *
1.68 + *
1.69 + * <p>
1.70 + * Every escape sequence is two characters
1.71 + * (in fact, two UTF8 bytes) beginning with
1.72 + * the escape character and followed by a
1.73 + * <cite>replacement character</cite>.
1.74 + * (Since the replacement character is never a backslash,
1.75 + * iterated manglings do not double in size.)
1.76 + * </p>
1.77 + * <p>
1.78 + * Each dangerous character has some rough visual similarity
1.79 + * to its corresponding replacement character.
1.80 + * This makes mangled symbols easier to recognize by sight.
1.81 + * </p>
1.82 + * <p>
1.83 + * The dangerous characters are
1.84 + * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
1.85 + * <code><big><b>.</b></big></code> (dot, also a package delimiter),
1.86 + * <code><big><b>;</b></big></code> (semicolon, used in signatures),
1.87 + * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
1.88 + * <code><big><b><</b></big></code> (left angle),
1.89 + * <code><big><b>></b></big></code> (right angle),
1.90 + * <code><big><b>[</b></big></code> (left square bracket, used in array types),
1.91 + * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
1.92 + * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
1.93 + * Their replacements are, respectively,
1.94 + * <code><big><b>|</b></big></code> (vertical bar),
1.95 + * <code><big><b>,</b></big></code> (comma),
1.96 + * <code><big><b>?</b></big></code> (question mark),
1.97 + * <code><big><b>%</b></big></code> (percent),
1.98 + * <code><big><b>^</b></big></code> (caret),
1.99 + * <code><big><b>_</b></big></code> (underscore), and
1.100 + * <code><big><b>{</b></big></code> (left curly bracket),
1.101 + * <code><big><b>}</b></big></code> (right curly bracket),
1.102 + * <code><big><b>!</b></big></code> (exclamation mark).
1.103 + * In addition, the replacement character for the escape character itself is
1.104 + * <code><big><b>-</b></big></code> (hyphen),
1.105 + * and the replacement character for the null prefix is
1.106 + * <code><big><b>=</b></big></code> (equal sign).
1.107 + * </p>
1.108 + * <p>
1.109 + * An escape character <code><big><b>\</b></big></code>
1.110 + * followed by any of these replacement characters
1.111 + * is an escape sequence, and there are no other escape sequences.
1.112 + * An equal sign is only part of an escape sequence
1.113 + * if it is the second character in the whole string, following a backslash.
1.114 + * Two consecutive backslashes do <em>not</em> form an escape sequence.
1.115 + * </p>
1.116 + * <p>
1.117 + * Each escape sequence replaces a so-called <cite>original character</cite>
1.118 + * which is either one of the dangerous characters or the escape character.
1.119 + * A null prefix replaces an initial null string, not a character.
1.120 + * </p>
1.121 + * <p>
1.122 + * All this implies that escape sequences cannot overlap and may be
1.123 + * determined all at once for a whole string. Note that a spelling
1.124 + * string can contain <cite>accidental escapes</cite>, apparent escape
1.125 + * sequences which must not be interpreted as manglings.
1.126 + * These are disabled by replacing their leading backslash with an
1.127 + * escape sequence (<code><big><b>\-</b></big></code>). To mangle a string, three logical steps
1.128 + * are required, though they may be carried out in one pass:
1.129 + * </p>
1.130 + * <ol>
1.131 + * <li>In each accidental escape, replace the backslash with an escape sequence
1.132 + * (<code><big><b>\-</b></big></code>).</li>
1.133 + * <li>Replace each dangerous character with an escape sequence
1.134 + * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
1.135 + * <li>If the first two steps introduced any change, <em>and</em>
1.136 + * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
1.137 + * </ol>
1.138 + *
1.139 + * To demangle a mangled string that begins with an escape,
1.140 + * remove any null prefix, and then replace (in parallel)
1.141 + * each escape sequence by its original character.
1.142 + * <p>Spelling strings which contain accidental
1.143 + * escapes <em>must</em> have them replaced, even if those
1.144 + * strings do not contain dangerous characters.
1.145 + * This restriction means that mangling a string always
1.146 + * requires a scan of the string for escapes.
1.147 + * But then, a scan would be required anyway,
1.148 + * to check for dangerous characters.
1.149 + *
1.150 + * </p>
1.151 + * <h3> Nice Properties </h3>
1.152 + *
1.153 + * <p>
1.154 + * If a bytecode name does not contain any escape sequence,
1.155 + * demangling is a no-op: The string demangles to itself.
1.156 + * Such a string is called <cite>self-mangling</cite>.
1.157 + * Almost all strings are self-mangling.
1.158 + * In practice, to demangle almost any name “found in nature”,
1.159 + * simply verify that it does not begin with a backslash.
1.160 + * </p>
1.161 + * <p>
1.162 + * Mangling is a one-to-one function, while demangling
1.163 + * is a many-to-one function.
1.164 + * A mangled string is defined as <cite>validly mangled</cite> if
1.165 + * it is in fact the unique mangling of its spelling string.
1.166 + * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
1.167 + * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
1.168 + * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
1.169 + * If a language back-end or runtime is using mangled names,
1.170 + * it should never present an invalidly mangled bytecode
1.171 + * name to the JVM. If the runtime encounters one,
1.172 + * it should also report an error, since such an occurrence
1.173 + * probably indicates a bug in name encoding which
1.174 + * will lead to errors in linkage.
1.175 + * However, this note does not propose that the JVM verifier
1.176 + * detect invalidly mangled names.
1.177 + * </p>
1.178 + * <p>
1.179 + * As a result of these rules, it is a simple matter to
1.180 + * compute validly mangled substrings and concatenations
1.181 + * of validly mangled strings, and (with a little care)
1.182 + * these correspond to corresponding operations on their
1.183 + * spelling strings.
1.184 + * </p>
1.185 + * <ul>
1.186 + * <li>Any prefix of a validly mangled string is also validly mangled,
1.187 + * although a null prefix may need to be removed.</li>
1.188 + * <li>Any suffix of a validly mangled string is also validly mangled,
1.189 + * although a null prefix may need to be added.</li>
1.190 + * <li>Two validly mangled strings, when concatenated,
1.191 + * are also validly mangled, although any null prefix
1.192 + * must be removed from the second string,
1.193 + * and a trailing backslash on the first string may need escaping,
1.194 + * if it would participate in an accidental escape when followed
1.195 + * by the first character of the second string.</li>
1.196 + * </ul>
1.197 + * <p>If languages that include non-Java symbol spellings use this
1.198 + * mangling convention, they will enjoy the following advantages:
1.199 + * </p>
1.200 + * <ul>
1.201 + * <li>They can interoperate via symbols they share in common.</li>
1.202 + * <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
1.203 + * <li>Future JVM and language extensions can safely use the dangerous characters
1.204 + * for structuring symbols, but will never interfere with valid spellings.</li>
1.205 + * <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
1.206 + * <li>Occasional transliterations and name composition will be simple and regular,
1.207 + * for classes, methods, and fields.</li>
1.208 + * <li>Bytecode names will continue to be compact.
1.209 + * When mangled, spellings will at most double in length, either in
1.210 + * UTF8 or UTF16 format, and most will not change at all.</li>
1.211 + * </ul>
1.212 + *
1.213 + *
1.214 + * <h3> Suggestions for Human Readable Presentations </h3>
1.215 + *
1.216 + *
1.217 + * <p>
1.218 + * For human readable displays of symbols,
1.219 + * it will be better to present a string-like quoted
1.220 + * representation of the spelling, because JVM users
1.221 + * are generally familiar with such tokens.
1.222 + * We suggest using single or double quotes before and after
1.223 + * mangled symbols which are not valid Java identifiers,
1.224 + * with quotes, backslashes, and non-printing characters
1.225 + * escaped as if for literals in the Java language.
1.226 + * </p>
1.227 + * <p>
1.228 + * For example, an HTML-like spelling
1.229 + * <code><big><b><pre></b></big></code> mangles to
1.230 + * <code><big><b>\^pre\_</b></big></code> and could
1.231 + * display more cleanly as
1.232 + * <code><big><b>'<pre>'</b></big></code>,
1.233 + * with the quotes included.
1.234 + * Such string-like conventions are <em>not</em> suitable
1.235 + * for mangled bytecode names, in part because
1.236 + * dangerous characters must be eliminated, rather
1.237 + * than just quoted. Otherwise internally structured
1.238 + * strings like package prefixes and method signatures
1.239 + * could not be reliably parsed.
1.240 + * </p>
1.241 + * <p>
1.242 + * In such human-readable displays, invalidly mangled
1.243 + * names should <em>not</em> be demangled and quoted,
1.244 + * for this would be misleading. Likewise, JVM symbols
1.245 + * which contain dangerous characters (like dots in field
1.246 + * names or brackets in method names) should not be
1.247 + * simply quoted. The bytecode names
1.248 + * <code><big><b>\=phase\,1</b></big></code> and
1.249 + * <code><big><b>phase.1</b></big></code> are distinct,
1.250 + * and in demangled displays they should be presented as
1.251 + * <code><big><b>'phase.1'</b></big></code> and something like
1.252 + * <code><big><b>'phase'.1</b></big></code>, respectively.
1.253 + * </p>
1.254 + *
1.255 + * @author John Rose
1.256 + * @version 1.2, 02/06/2008
1.257 + * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
1.258 + */
1.259 +public class BytecodeName {
1.260 + private BytecodeName() { } // static only class
1.261 +
1.262 + /** Given a source name, produce the corresponding bytecode name.
1.263 + * The source name should not be qualified, because any syntactic
1.264 + * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
1.265 + * @param s the source name
1.266 + * @return a valid bytecode name which represents the source name
1.267 + */
1.268 + public static String toBytecodeName(String s) {
1.269 + String bn = mangle(s);
1.270 + assert((Object)bn == s || looksMangled(bn)) : bn;
1.271 + assert(s.equals(toSourceName(bn))) : s;
1.272 + return bn;
1.273 + }
1.274 +
1.275 + /** Given an unqualified bytecode name, produce the corresponding source name.
1.276 + * The bytecode name must not contain dangerous characters.
1.277 + * In particular, it must not be qualified or segmented by colon {@code ':'}.
1.278 + * @param s the bytecode name
1.279 + * @return the source name, which may possibly have unsafe characters
1.280 + * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
1.281 + * @see #isSafeBytecodeName(java.lang.String)
1.282 + */
1.283 + public static String toSourceName(String s) {
1.284 + checkSafeBytecodeName(s);
1.285 + String sn = s;
1.286 + if (looksMangled(s)) {
1.287 + sn = demangle(s);
1.288 + assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
1.289 + }
1.290 + return sn;
1.291 + }
1.292 +
1.293 + /**
1.294 + * Given a bytecode name from a classfile, separate it into
1.295 + * components delimited by dangerous characters.
1.296 + * Each resulting array element will be either a dangerous character,
1.297 + * or else a safe bytecode name.
1.298 + * (The safe name might possibly be mangled to hide further dangerous characters.)
1.299 + * For example, the qualified class name {@code java/lang/String}
1.300 + * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
1.301 + * The name {@code <init>} will be parsed into { '<', "init", '>'}}
1.302 + * The name {@code foo/bar$:baz} will be parsed into
1.303 + * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
1.304 + * The name {@code ::\=:foo:\=bar\!baz} will be parsed into
1.305 + * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
1.306 + */
1.307 + public static Object[] parseBytecodeName(String s) {
1.308 + int slen = s.length();
1.309 + Object[] res = null;
1.310 + for (int pass = 0; pass <= 1; pass++) {
1.311 + int fillp = 0;
1.312 + int lasti = 0;
1.313 + for (int i = 0; i <= slen; i++) {
1.314 + int whichDC = -1;
1.315 + if (i < slen) {
1.316 + whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
1.317 + if (whichDC < DANGEROUS_CHAR_FIRST_INDEX) continue;
1.318 + }
1.319 + // got to end of string or next dangerous char
1.320 + if (lasti < i) {
1.321 + // normal component
1.322 + if (pass != 0)
1.323 + res[fillp] = toSourceName(s.substring(lasti, i));
1.324 + fillp++;
1.325 + lasti = i+1;
1.326 + }
1.327 + if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
1.328 + if (pass != 0)
1.329 + res[fillp] = DANGEROUS_CHARS_CA[whichDC];
1.330 + fillp++;
1.331 + lasti = i+1;
1.332 + }
1.333 + }
1.334 + if (pass != 0) break;
1.335 + // between passes, build the result array
1.336 + res = new Object[fillp];
1.337 + if (fillp <= 1 && lasti == 0) {
1.338 + if (fillp != 0) res[0] = toSourceName(s);
1.339 + break;
1.340 + }
1.341 + }
1.342 + return res;
1.343 + }
1.344 +
1.345 + /**
1.346 + * Given a series of components, create a bytecode name for a classfile.
1.347 + * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
1.348 + * Each component must either be an interned one-character string of
1.349 + * a dangerous character, or else a safe bytecode name.
1.350 + * @param components a series of name components
1.351 + * @return the concatenation of all components
1.352 + * @throws IllegalArgumentException if any component contains an unsafe
1.353 + * character, and is not an interned one-character string
1.354 + * @throws NullPointerException if any component is null
1.355 + */
1.356 + public static String unparseBytecodeName(Object[] components) {
1.357 + Object[] components0 = components;
1.358 + for (int i = 0; i < components.length; i++) {
1.359 + Object c = components[i];
1.360 + if (c instanceof String) {
1.361 + String mc = toBytecodeName((String) c);
1.362 + if (i == 0 && components.length == 1)
1.363 + return mc; // usual case
1.364 + if ((Object)mc != c) {
1.365 + if (components == components0)
1.366 + components = components.clone();
1.367 + components[i] = c = mc;
1.368 + }
1.369 + }
1.370 + }
1.371 + return appendAll(components);
1.372 + }
1.373 + private static String appendAll(Object[] components) {
1.374 + if (components.length <= 1) {
1.375 + if (components.length == 1) {
1.376 + return String.valueOf(components[0]);
1.377 + }
1.378 + return "";
1.379 + }
1.380 + int slen = 0;
1.381 + for (Object c : components) {
1.382 + if (c instanceof String)
1.383 + slen += String.valueOf(c).length();
1.384 + else
1.385 + slen += 1;
1.386 + }
1.387 + StringBuilder sb = new StringBuilder(slen);
1.388 + for (Object c : components) {
1.389 + sb.append(c);
1.390 + }
1.391 + return sb.toString();
1.392 + }
1.393 +
1.394 + /**
1.395 + * Given a bytecode name, produce the corresponding display name.
1.396 + * This is the source name, plus quotes if needed.
1.397 + * If the bytecode name contains dangerous characters,
1.398 + * assume that they are being used as punctuation,
1.399 + * and pass them through unchanged.
1.400 + * Non-empty runs of non-dangerous characters are demangled
1.401 + * if necessary, and the resulting names are quoted if
1.402 + * they are not already valid Java identifiers, or if
1.403 + * they contain a dangerous character (i.e., dollar sign "$").
1.404 + * Single quotes are used when quoting.
1.405 + * Within quoted names, embedded single quotes and backslashes
1.406 + * are further escaped by prepended backslashes.
1.407 + *
1.408 + * @param s the original bytecode name (which may be qualified)
1.409 + * @return a human-readable presentation
1.410 + */
1.411 + public static String toDisplayName(String s) {
1.412 + Object[] components = parseBytecodeName(s);
1.413 + for (int i = 0; i < components.length; i++) {
1.414 + if (!(components[i] instanceof String))
1.415 + continue;
1.416 + String sn = (String) components[i];
1.417 + // note that the name is already demangled!
1.418 + //sn = toSourceName(sn);
1.419 + if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
1.420 + components[i] = quoteDisplay(sn);
1.421 + }
1.422 + }
1.423 + return appendAll(components);
1.424 + }
1.425 + private static boolean isJavaIdent(String s) {
1.426 + int slen = s.length();
1.427 + if (slen == 0) return false;
1.428 + if (!Character.isJavaIdentifierStart(s.charAt(0)))
1.429 + return false;
1.430 + for (int i = 1; i < slen; i++) {
1.431 + if (!Character.isJavaIdentifierPart(s.charAt(i)))
1.432 + return false;
1.433 + }
1.434 + return true;
1.435 + }
1.436 + private static String quoteDisplay(String s) {
1.437 + // TO DO: Replace wierd characters in s by C-style escapes.
1.438 + return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
1.439 + }
1.440 +
1.441 + private static void checkSafeBytecodeName(String s)
1.442 + throws IllegalArgumentException {
1.443 + if (!isSafeBytecodeName(s)) {
1.444 + throw new IllegalArgumentException(s);
1.445 + }
1.446 + }
1.447 +
1.448 + /**
1.449 + * Report whether a simple name is safe as a bytecode name.
1.450 + * Such names are acceptable in class files as class, method, and field names.
1.451 + * Additionally, they are free of "dangerous" characters, even if those
1.452 + * characters are legal in some (or all) names in class files.
1.453 + * @param s the proposed bytecode name
1.454 + * @return true if the name is non-empty and all of its characters are safe
1.455 + */
1.456 + public static boolean isSafeBytecodeName(String s) {
1.457 + if (s.length() == 0) return false;
1.458 + // check occurrences of each DANGEROUS char
1.459 + for (char xc : DANGEROUS_CHARS_A) {
1.460 + if (xc == ESCAPE_C) continue; // not really that dangerous
1.461 + if (s.indexOf(xc) >= 0) return false;
1.462 + }
1.463 + return true;
1.464 + }
1.465 +
1.466 + /**
1.467 + * Report whether a character is safe in a bytecode name.
1.468 + * This is true of any unicode character except the following
1.469 + * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
1.470 + * @param s the proposed character
1.471 + * @return true if the character is safe to use in classfiles
1.472 + */
1.473 + public static boolean isSafeBytecodeChar(char c) {
1.474 + return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
1.475 + }
1.476 +
1.477 + private static boolean looksMangled(String s) {
1.478 + return s.charAt(0) == ESCAPE_C;
1.479 + }
1.480 +
1.481 + private static String mangle(String s) {
1.482 + if (s.length() == 0)
1.483 + return NULL_ESCAPE;
1.484 +
1.485 + // build this lazily, when we first need an escape:
1.486 + StringBuilder sb = null;
1.487 +
1.488 + for (int i = 0, slen = s.length(); i < slen; i++) {
1.489 + char c = s.charAt(i);
1.490 +
1.491 + boolean needEscape = false;
1.492 + if (c == ESCAPE_C) {
1.493 + if (i+1 < slen) {
1.494 + char c1 = s.charAt(i+1);
1.495 + if ((i == 0 && c1 == NULL_ESCAPE_C)
1.496 + || c1 != originalOfReplacement(c1)) {
1.497 + // an accidental escape
1.498 + needEscape = true;
1.499 + }
1.500 + }
1.501 + } else {
1.502 + needEscape = isDangerous(c);
1.503 + }
1.504 +
1.505 + if (!needEscape) {
1.506 + if (sb != null) sb.append(c);
1.507 + continue;
1.508 + }
1.509 +
1.510 + // build sb if this is the first escape
1.511 + if (sb == null) {
1.512 + sb = new StringBuilder(s.length()+10);
1.513 + // mangled names must begin with a backslash:
1.514 + if (s.charAt(0) != ESCAPE_C && i > 0)
1.515 + sb.append(NULL_ESCAPE);
1.516 + // append the string so far, which is unremarkable:
1.517 + sb.append(s.substring(0, i));
1.518 + }
1.519 +
1.520 + // rewrite \ to \-, / to \|, etc.
1.521 + sb.append(ESCAPE_C);
1.522 + sb.append(replacementOf(c));
1.523 + }
1.524 +
1.525 + if (sb != null) return sb.toString();
1.526 +
1.527 + return s;
1.528 + }
1.529 +
1.530 + private static String demangle(String s) {
1.531 + // build this lazily, when we first meet an escape:
1.532 + StringBuilder sb = null;
1.533 +
1.534 + int stringStart = 0;
1.535 + if (s.startsWith(NULL_ESCAPE))
1.536 + stringStart = 2;
1.537 +
1.538 + for (int i = stringStart, slen = s.length(); i < slen; i++) {
1.539 + char c = s.charAt(i);
1.540 +
1.541 + if (c == ESCAPE_C && i+1 < slen) {
1.542 + // might be an escape sequence
1.543 + char rc = s.charAt(i+1);
1.544 + char oc = originalOfReplacement(rc);
1.545 + if (oc != rc) {
1.546 + // build sb if this is the first escape
1.547 + if (sb == null) {
1.548 + sb = new StringBuilder(s.length());
1.549 + // append the string so far, which is unremarkable:
1.550 + sb.append(s.substring(stringStart, i));
1.551 + }
1.552 + ++i; // skip both characters
1.553 + c = oc;
1.554 + }
1.555 + }
1.556 +
1.557 + if (sb != null)
1.558 + sb.append(c);
1.559 + }
1.560 +
1.561 + if (sb != null) return sb.toString();
1.562 +
1.563 + return s.substring(stringStart);
1.564 + }
1.565 +
1.566 + static char ESCAPE_C = '\\';
1.567 + // empty escape sequence to avoid a null name or illegal prefix
1.568 + static char NULL_ESCAPE_C = '=';
1.569 + static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
1.570 +
1.571 + static final String DANGEROUS_CHARS = "\\/.;:$[]<>"; // \\ must be first
1.572 + static final String REPLACEMENT_CHARS = "-|,?!%{}^_";
1.573 + static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
1.574 + static char[] DANGEROUS_CHARS_A = DANGEROUS_CHARS.toCharArray();
1.575 + static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
1.576 + static final Character[] DANGEROUS_CHARS_CA;
1.577 + static {
1.578 + Character[] dcca = new Character[DANGEROUS_CHARS.length()];
1.579 + for (int i = 0; i < dcca.length; i++)
1.580 + dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
1.581 + DANGEROUS_CHARS_CA = dcca;
1.582 + }
1.583 +
1.584 + static final long[] SPECIAL_BITMAP = new long[2]; // 128 bits
1.585 + static {
1.586 + String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
1.587 + //System.out.println("SPECIAL = "+SPECIAL);
1.588 + for (char c : SPECIAL.toCharArray()) {
1.589 + SPECIAL_BITMAP[c >>> 6] |= 1L << c;
1.590 + }
1.591 + }
1.592 + static boolean isSpecial(char c) {
1.593 + if ((c >>> 6) < SPECIAL_BITMAP.length)
1.594 + return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
1.595 + else
1.596 + return false;
1.597 + }
1.598 + static char replacementOf(char c) {
1.599 + if (!isSpecial(c)) return c;
1.600 + int i = DANGEROUS_CHARS.indexOf(c);
1.601 + if (i < 0) return c;
1.602 + return REPLACEMENT_CHARS.charAt(i);
1.603 + }
1.604 + static char originalOfReplacement(char c) {
1.605 + if (!isSpecial(c)) return c;
1.606 + int i = REPLACEMENT_CHARS.indexOf(c);
1.607 + if (i < 0) return c;
1.608 + return DANGEROUS_CHARS.charAt(i);
1.609 + }
1.610 + static boolean isDangerous(char c) {
1.611 + if (!isSpecial(c)) return false;
1.612 + return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
1.613 + }
1.614 + static int indexOfDangerousChar(String s, int from) {
1.615 + for (int i = from, slen = s.length(); i < slen; i++) {
1.616 + if (isDangerous(s.charAt(i)))
1.617 + return i;
1.618 + }
1.619 + return -1;
1.620 + }
1.621 + static int lastIndexOfDangerousChar(String s, int from) {
1.622 + for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
1.623 + if (isDangerous(s.charAt(i)))
1.624 + return i;
1.625 + }
1.626 + return -1;
1.627 + }
1.628 +
1.629 +
1.630 +}