rt/emul/compact/src/main/java/sun/invoke/util/BytecodeName.java
branchjdk8-b132
changeset 1646 c880a8a8803b
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/rt/emul/compact/src/main/java/sun/invoke/util/BytecodeName.java	Sat Aug 09 11:11:13 2014 +0200
     1.3 @@ -0,0 +1,627 @@
     1.4 +/*
     1.5 + * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package sun.invoke.util;
    1.30 +
    1.31 +/**
    1.32 + * Utility routines for dealing with bytecode-level names.
    1.33 + * Includes universal mangling rules for the JVM.
    1.34 + *
    1.35 + * <h3>Avoiding Dangerous Characters </h3>
    1.36 + *
    1.37 + * <p>
    1.38 + * The JVM defines a very small set of characters which are illegal
    1.39 + * in name spellings.  We will slightly extend and regularize this set
    1.40 + * into a group of <cite>dangerous characters</cite>.
    1.41 + * These characters will then be replaced, in mangled names, by escape sequences.
    1.42 + * In addition, accidental escape sequences must be further escaped.
    1.43 + * Finally, a special prefix will be applied if and only if
    1.44 + * the mangling would otherwise fail to begin with the escape character.
    1.45 + * This happens to cover the corner case of the null string,
    1.46 + * and also clearly marks symbols which need demangling.
    1.47 + * </p>
    1.48 + * <p>
    1.49 + * Dangerous characters are the union of all characters forbidden
    1.50 + * or otherwise restricted by the JVM specification,
    1.51 + * plus their mates, if they are brackets
    1.52 + * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
    1.53 + * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
    1.54 + * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
    1.55 + * There is no distinction between type, method, and field names.
    1.56 + * This makes it easier to convert between mangled names of different
    1.57 + * types, since they do not need to be decoded (demangled).
    1.58 + * </p>
    1.59 + * <p>
    1.60 + * The escape character is backslash <code><big><b>\</b></big></code>
    1.61 + * (also known as reverse solidus).
    1.62 + * This character is, until now, unheard of in bytecode names,
    1.63 + * but traditional in the proposed role.
    1.64 + *
    1.65 + * </p>
    1.66 + * <h3> Replacement Characters </h3>
    1.67 + *
    1.68 + *
    1.69 + * <p>
    1.70 + * Every escape sequence is two characters
    1.71 + * (in fact, two UTF8 bytes) beginning with
    1.72 + * the escape character and followed by a
    1.73 + * <cite>replacement character</cite>.
    1.74 + * (Since the replacement character is never a backslash,
    1.75 + * iterated manglings do not double in size.)
    1.76 + * </p>
    1.77 + * <p>
    1.78 + * Each dangerous character has some rough visual similarity
    1.79 + * to its corresponding replacement character.
    1.80 + * This makes mangled symbols easier to recognize by sight.
    1.81 + * </p>
    1.82 + * <p>
    1.83 + * The dangerous characters are
    1.84 + * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
    1.85 + * <code><big><b>.</b></big></code> (dot, also a package delimiter),
    1.86 + * <code><big><b>;</b></big></code> (semicolon, used in signatures),
    1.87 + * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
    1.88 + * <code><big><b>&lt;</b></big></code> (left angle),
    1.89 + * <code><big><b>&gt;</b></big></code> (right angle),
    1.90 + * <code><big><b>[</b></big></code> (left square bracket, used in array types),
    1.91 + * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
    1.92 + * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
    1.93 + * Their replacements are, respectively,
    1.94 + * <code><big><b>|</b></big></code> (vertical bar),
    1.95 + * <code><big><b>,</b></big></code> (comma),
    1.96 + * <code><big><b>?</b></big></code> (question mark),
    1.97 + * <code><big><b>%</b></big></code> (percent),
    1.98 + * <code><big><b>^</b></big></code> (caret),
    1.99 + * <code><big><b>_</b></big></code> (underscore), and
   1.100 + * <code><big><b>{</b></big></code> (left curly bracket),
   1.101 + * <code><big><b>}</b></big></code> (right curly bracket),
   1.102 + * <code><big><b>!</b></big></code> (exclamation mark).
   1.103 + * In addition, the replacement character for the escape character itself is
   1.104 + * <code><big><b>-</b></big></code> (hyphen),
   1.105 + * and the replacement character for the null prefix is
   1.106 + * <code><big><b>=</b></big></code> (equal sign).
   1.107 + * </p>
   1.108 + * <p>
   1.109 + * An escape character <code><big><b>\</b></big></code>
   1.110 + * followed by any of these replacement characters
   1.111 + * is an escape sequence, and there are no other escape sequences.
   1.112 + * An equal sign is only part of an escape sequence
   1.113 + * if it is the second character in the whole string, following a backslash.
   1.114 + * Two consecutive backslashes do <em>not</em> form an escape sequence.
   1.115 + * </p>
   1.116 + * <p>
   1.117 + * Each escape sequence replaces a so-called <cite>original character</cite>
   1.118 + * which is either one of the dangerous characters or the escape character.
   1.119 + * A null prefix replaces an initial null string, not a character.
   1.120 + * </p>
   1.121 + * <p>
   1.122 + * All this implies that escape sequences cannot overlap and may be
   1.123 + * determined all at once for a whole string.  Note that a spelling
   1.124 + * string can contain <cite>accidental escapes</cite>, apparent escape
   1.125 + * sequences which must not be interpreted as manglings.
   1.126 + * These are disabled by replacing their leading backslash with an
   1.127 + * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
   1.128 + * are required, though they may be carried out in one pass:
   1.129 + * </p>
   1.130 + * <ol>
   1.131 + *   <li>In each accidental escape, replace the backslash with an escape sequence
   1.132 + * (<code><big><b>\-</b></big></code>).</li>
   1.133 + *   <li>Replace each dangerous character with an escape sequence
   1.134 + * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
   1.135 + *   <li>If the first two steps introduced any change, <em>and</em>
   1.136 + * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
   1.137 + * </ol>
   1.138 + *
   1.139 + * To demangle a mangled string that begins with an escape,
   1.140 + * remove any null prefix, and then replace (in parallel)
   1.141 + * each escape sequence by its original character.
   1.142 + * <p>Spelling strings which contain accidental
   1.143 + * escapes <em>must</em> have them replaced, even if those
   1.144 + * strings do not contain dangerous characters.
   1.145 + * This restriction means that mangling a string always
   1.146 + * requires a scan of the string for escapes.
   1.147 + * But then, a scan would be required anyway,
   1.148 + * to check for dangerous characters.
   1.149 + *
   1.150 + * </p>
   1.151 + * <h3> Nice Properties </h3>
   1.152 + *
   1.153 + * <p>
   1.154 + * If a bytecode name does not contain any escape sequence,
   1.155 + * demangling is a no-op:  The string demangles to itself.
   1.156 + * Such a string is called <cite>self-mangling</cite>.
   1.157 + * Almost all strings are self-mangling.
   1.158 + * In practice, to demangle almost any name &ldquo;found in nature&rdquo;,
   1.159 + * simply verify that it does not begin with a backslash.
   1.160 + * </p>
   1.161 + * <p>
   1.162 + * Mangling is a one-to-one function, while demangling
   1.163 + * is a many-to-one function.
   1.164 + * A mangled string is defined as <cite>validly mangled</cite> if
   1.165 + * it is in fact the unique mangling of its spelling string.
   1.166 + * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
   1.167 + * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
   1.168 + * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
   1.169 + * If a language back-end or runtime is using mangled names,
   1.170 + * it should never present an invalidly mangled bytecode
   1.171 + * name to the JVM.  If the runtime encounters one,
   1.172 + * it should also report an error, since such an occurrence
   1.173 + * probably indicates a bug in name encoding which
   1.174 + * will lead to errors in linkage.
   1.175 + * However, this note does not propose that the JVM verifier
   1.176 + * detect invalidly mangled names.
   1.177 + * </p>
   1.178 + * <p>
   1.179 + * As a result of these rules, it is a simple matter to
   1.180 + * compute validly mangled substrings and concatenations
   1.181 + * of validly mangled strings, and (with a little care)
   1.182 + * these correspond to corresponding operations on their
   1.183 + * spelling strings.
   1.184 + * </p>
   1.185 + * <ul>
   1.186 + *   <li>Any prefix of a validly mangled string is also validly mangled,
   1.187 + * although a null prefix may need to be removed.</li>
   1.188 + *   <li>Any suffix of a validly mangled string is also validly mangled,
   1.189 + * although a null prefix may need to be added.</li>
   1.190 + *   <li>Two validly mangled strings, when concatenated,
   1.191 + * are also validly mangled, although any null prefix
   1.192 + * must be removed from the second string,
   1.193 + * and a trailing backslash on the first string may need escaping,
   1.194 + * if it would participate in an accidental escape when followed
   1.195 + * by the first character of the second string.</li>
   1.196 + * </ul>
   1.197 + * <p>If languages that include non-Java symbol spellings use this
   1.198 + * mangling convention, they will enjoy the following advantages:
   1.199 + * </p>
   1.200 + * <ul>
   1.201 + *   <li>They can interoperate via symbols they share in common.</li>
   1.202 + *   <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
   1.203 + *   <li>Future JVM and language extensions can safely use the dangerous characters
   1.204 + * for structuring symbols, but will never interfere with valid spellings.</li>
   1.205 + *   <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
   1.206 + *   <li>Occasional transliterations and name composition will be simple and regular,
   1.207 + * for classes, methods, and fields.</li>
   1.208 + *   <li>Bytecode names will continue to be compact.
   1.209 + * When mangled, spellings will at most double in length, either in
   1.210 + * UTF8 or UTF16 format, and most will not change at all.</li>
   1.211 + * </ul>
   1.212 + *
   1.213 + *
   1.214 + * <h3> Suggestions for Human Readable Presentations </h3>
   1.215 + *
   1.216 + *
   1.217 + * <p>
   1.218 + * For human readable displays of symbols,
   1.219 + * it will be better to present a string-like quoted
   1.220 + * representation of the spelling, because JVM users
   1.221 + * are generally familiar with such tokens.
   1.222 + * We suggest using single or double quotes before and after
   1.223 + * mangled symbols which are not valid Java identifiers,
   1.224 + * with quotes, backslashes, and non-printing characters
   1.225 + * escaped as if for literals in the Java language.
   1.226 + * </p>
   1.227 + * <p>
   1.228 + * For example, an HTML-like spelling
   1.229 + * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
   1.230 + * <code><big><b>\^pre\_</b></big></code> and could
   1.231 + * display more cleanly as
   1.232 + * <code><big><b>'&lt;pre&gt;'</b></big></code>,
   1.233 + * with the quotes included.
   1.234 + * Such string-like conventions are <em>not</em> suitable
   1.235 + * for mangled bytecode names, in part because
   1.236 + * dangerous characters must be eliminated, rather
   1.237 + * than just quoted.  Otherwise internally structured
   1.238 + * strings like package prefixes and method signatures
   1.239 + * could not be reliably parsed.
   1.240 + * </p>
   1.241 + * <p>
   1.242 + * In such human-readable displays, invalidly mangled
   1.243 + * names should <em>not</em> be demangled and quoted,
   1.244 + * for this would be misleading.  Likewise, JVM symbols
   1.245 + * which contain dangerous characters (like dots in field
   1.246 + * names or brackets in method names) should not be
   1.247 + * simply quoted.  The bytecode names
   1.248 + * <code><big><b>\=phase\,1</b></big></code> and
   1.249 + * <code><big><b>phase.1</b></big></code> are distinct,
   1.250 + * and in demangled displays they should be presented as
   1.251 + * <code><big><b>'phase.1'</b></big></code> and something like
   1.252 + * <code><big><b>'phase'.1</b></big></code>, respectively.
   1.253 + * </p>
   1.254 + *
   1.255 + * @author John Rose
   1.256 + * @version 1.2, 02/06/2008
   1.257 + * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
   1.258 + */
   1.259 +public class BytecodeName {
   1.260 +    private BytecodeName() { }  // static only class
   1.261 +
   1.262 +    /** Given a source name, produce the corresponding bytecode name.
   1.263 +     * The source name should not be qualified, because any syntactic
   1.264 +     * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
   1.265 +     * @param s the source name
   1.266 +     * @return a valid bytecode name which represents the source name
   1.267 +     */
   1.268 +    public static String toBytecodeName(String s) {
   1.269 +        String bn = mangle(s);
   1.270 +        assert((Object)bn == s || looksMangled(bn)) : bn;
   1.271 +        assert(s.equals(toSourceName(bn))) : s;
   1.272 +        return bn;
   1.273 +    }
   1.274 +
   1.275 +    /** Given an unqualified bytecode name, produce the corresponding source name.
   1.276 +     * The bytecode name must not contain dangerous characters.
   1.277 +     * In particular, it must not be qualified or segmented by colon {@code ':'}.
   1.278 +     * @param s the bytecode name
   1.279 +     * @return the source name, which may possibly have unsafe characters
   1.280 +     * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
   1.281 +     * @see #isSafeBytecodeName(java.lang.String)
   1.282 +     */
   1.283 +    public static String toSourceName(String s) {
   1.284 +        checkSafeBytecodeName(s);
   1.285 +        String sn = s;
   1.286 +        if (looksMangled(s)) {
   1.287 +            sn = demangle(s);
   1.288 +            assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
   1.289 +        }
   1.290 +        return sn;
   1.291 +    }
   1.292 +
   1.293 +    /**
   1.294 +     * Given a bytecode name from a classfile, separate it into
   1.295 +     * components delimited by dangerous characters.
   1.296 +     * Each resulting array element will be either a dangerous character,
   1.297 +     * or else a safe bytecode name.
   1.298 +     * (The safe name might possibly be mangled to hide further dangerous characters.)
   1.299 +     * For example, the qualified class name {@code java/lang/String}
   1.300 +     * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
   1.301 +     * The name {@code &lt;init&gt;} will be parsed into { '&lt;', "init", '&gt;'}}
   1.302 +     * The name {@code foo/bar$:baz} will be parsed into
   1.303 +     * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
   1.304 +     * The name {@code ::\=:foo:\=bar\!baz} will be parsed into
   1.305 +     * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
   1.306 +     */
   1.307 +    public static Object[] parseBytecodeName(String s) {
   1.308 +        int slen = s.length();
   1.309 +        Object[] res = null;
   1.310 +        for (int pass = 0; pass <= 1; pass++) {
   1.311 +            int fillp = 0;
   1.312 +            int lasti = 0;
   1.313 +            for (int i = 0; i <= slen; i++) {
   1.314 +                int whichDC = -1;
   1.315 +                if (i < slen) {
   1.316 +                    whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
   1.317 +                    if (whichDC < DANGEROUS_CHAR_FIRST_INDEX)  continue;
   1.318 +                }
   1.319 +                // got to end of string or next dangerous char
   1.320 +                if (lasti < i) {
   1.321 +                    // normal component
   1.322 +                    if (pass != 0)
   1.323 +                        res[fillp] = toSourceName(s.substring(lasti, i));
   1.324 +                    fillp++;
   1.325 +                    lasti = i+1;
   1.326 +                }
   1.327 +                if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
   1.328 +                    if (pass != 0)
   1.329 +                        res[fillp] = DANGEROUS_CHARS_CA[whichDC];
   1.330 +                    fillp++;
   1.331 +                    lasti = i+1;
   1.332 +                }
   1.333 +            }
   1.334 +            if (pass != 0)  break;
   1.335 +            // between passes, build the result array
   1.336 +            res = new Object[fillp];
   1.337 +            if (fillp <= 1 && lasti == 0) {
   1.338 +                if (fillp != 0)  res[0] = toSourceName(s);
   1.339 +                break;
   1.340 +            }
   1.341 +        }
   1.342 +        return res;
   1.343 +    }
   1.344 +
   1.345 +    /**
   1.346 +     * Given a series of components, create a bytecode name for a classfile.
   1.347 +     * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
   1.348 +     * Each component must either be an interned one-character string of
   1.349 +     * a dangerous character, or else a safe bytecode name.
   1.350 +     * @param components a series of name components
   1.351 +     * @return the concatenation of all components
   1.352 +     * @throws IllegalArgumentException if any component contains an unsafe
   1.353 +     *          character, and is not an interned one-character string
   1.354 +     * @throws NullPointerException if any component is null
   1.355 +     */
   1.356 +    public static String unparseBytecodeName(Object[] components) {
   1.357 +        Object[] components0 = components;
   1.358 +        for (int i = 0; i < components.length; i++) {
   1.359 +            Object c = components[i];
   1.360 +            if (c instanceof String) {
   1.361 +                String mc = toBytecodeName((String) c);
   1.362 +                if (i == 0 && components.length == 1)
   1.363 +                    return mc;  // usual case
   1.364 +                if ((Object)mc != c) {
   1.365 +                    if (components == components0)
   1.366 +                        components = components.clone();
   1.367 +                    components[i] = c = mc;
   1.368 +                }
   1.369 +            }
   1.370 +        }
   1.371 +        return appendAll(components);
   1.372 +    }
   1.373 +    private static String appendAll(Object[] components) {
   1.374 +        if (components.length <= 1) {
   1.375 +            if (components.length == 1) {
   1.376 +                return String.valueOf(components[0]);
   1.377 +            }
   1.378 +            return "";
   1.379 +        }
   1.380 +        int slen = 0;
   1.381 +        for (Object c : components) {
   1.382 +            if (c instanceof String)
   1.383 +                slen += String.valueOf(c).length();
   1.384 +            else
   1.385 +                slen += 1;
   1.386 +        }
   1.387 +        StringBuilder sb = new StringBuilder(slen);
   1.388 +        for (Object c : components) {
   1.389 +            sb.append(c);
   1.390 +        }
   1.391 +        return sb.toString();
   1.392 +    }
   1.393 +
   1.394 +    /**
   1.395 +     * Given a bytecode name, produce the corresponding display name.
   1.396 +     * This is the source name, plus quotes if needed.
   1.397 +     * If the bytecode name contains dangerous characters,
   1.398 +     * assume that they are being used as punctuation,
   1.399 +     * and pass them through unchanged.
   1.400 +     * Non-empty runs of non-dangerous characters are demangled
   1.401 +     * if necessary, and the resulting names are quoted if
   1.402 +     * they are not already valid Java identifiers, or if
   1.403 +     * they contain a dangerous character (i.e., dollar sign "$").
   1.404 +     * Single quotes are used when quoting.
   1.405 +     * Within quoted names, embedded single quotes and backslashes
   1.406 +     * are further escaped by prepended backslashes.
   1.407 +     *
   1.408 +     * @param s the original bytecode name (which may be qualified)
   1.409 +     * @return a human-readable presentation
   1.410 +     */
   1.411 +    public static String toDisplayName(String s) {
   1.412 +        Object[] components = parseBytecodeName(s);
   1.413 +        for (int i = 0; i < components.length; i++) {
   1.414 +            if (!(components[i] instanceof String))
   1.415 +                continue;
   1.416 +            String sn = (String) components[i];
   1.417 +            // note that the name is already demangled!
   1.418 +            //sn = toSourceName(sn);
   1.419 +            if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
   1.420 +                components[i] = quoteDisplay(sn);
   1.421 +            }
   1.422 +        }
   1.423 +        return appendAll(components);
   1.424 +    }
   1.425 +    private static boolean isJavaIdent(String s) {
   1.426 +        int slen = s.length();
   1.427 +        if (slen == 0)  return false;
   1.428 +        if (!Character.isJavaIdentifierStart(s.charAt(0)))
   1.429 +            return false;
   1.430 +        for (int i = 1; i < slen; i++) {
   1.431 +            if (!Character.isJavaIdentifierPart(s.charAt(i)))
   1.432 +                return false;
   1.433 +        }
   1.434 +        return true;
   1.435 +    }
   1.436 +    private static String quoteDisplay(String s) {
   1.437 +        // TO DO:  Replace wierd characters in s by C-style escapes.
   1.438 +        return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
   1.439 +    }
   1.440 +
   1.441 +    private static void checkSafeBytecodeName(String s)
   1.442 +            throws IllegalArgumentException {
   1.443 +        if (!isSafeBytecodeName(s)) {
   1.444 +            throw new IllegalArgumentException(s);
   1.445 +        }
   1.446 +    }
   1.447 +
   1.448 +    /**
   1.449 +     * Report whether a simple name is safe as a bytecode name.
   1.450 +     * Such names are acceptable in class files as class, method, and field names.
   1.451 +     * Additionally, they are free of "dangerous" characters, even if those
   1.452 +     * characters are legal in some (or all) names in class files.
   1.453 +     * @param s the proposed bytecode name
   1.454 +     * @return true if the name is non-empty and all of its characters are safe
   1.455 +     */
   1.456 +    public static boolean isSafeBytecodeName(String s) {
   1.457 +        if (s.length() == 0)  return false;
   1.458 +        // check occurrences of each DANGEROUS char
   1.459 +        for (char xc : DANGEROUS_CHARS_A) {
   1.460 +            if (xc == ESCAPE_C)  continue;  // not really that dangerous
   1.461 +            if (s.indexOf(xc) >= 0)  return false;
   1.462 +        }
   1.463 +        return true;
   1.464 +    }
   1.465 +
   1.466 +    /**
   1.467 +     * Report whether a character is safe in a bytecode name.
   1.468 +     * This is true of any unicode character except the following
   1.469 +     * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
   1.470 +     * @param s the proposed character
   1.471 +     * @return true if the character is safe to use in classfiles
   1.472 +     */
   1.473 +    public static boolean isSafeBytecodeChar(char c) {
   1.474 +        return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
   1.475 +    }
   1.476 +
   1.477 +    private static boolean looksMangled(String s) {
   1.478 +        return s.charAt(0) == ESCAPE_C;
   1.479 +    }
   1.480 +
   1.481 +    private static String mangle(String s) {
   1.482 +        if (s.length() == 0)
   1.483 +            return NULL_ESCAPE;
   1.484 +
   1.485 +        // build this lazily, when we first need an escape:
   1.486 +        StringBuilder sb = null;
   1.487 +
   1.488 +        for (int i = 0, slen = s.length(); i < slen; i++) {
   1.489 +            char c = s.charAt(i);
   1.490 +
   1.491 +            boolean needEscape = false;
   1.492 +            if (c == ESCAPE_C) {
   1.493 +                if (i+1 < slen) {
   1.494 +                    char c1 = s.charAt(i+1);
   1.495 +                    if ((i == 0 && c1 == NULL_ESCAPE_C)
   1.496 +                        || c1 != originalOfReplacement(c1)) {
   1.497 +                        // an accidental escape
   1.498 +                        needEscape = true;
   1.499 +                    }
   1.500 +                }
   1.501 +            } else {
   1.502 +                needEscape = isDangerous(c);
   1.503 +            }
   1.504 +
   1.505 +            if (!needEscape) {
   1.506 +                if (sb != null)  sb.append(c);
   1.507 +                continue;
   1.508 +            }
   1.509 +
   1.510 +            // build sb if this is the first escape
   1.511 +            if (sb == null) {
   1.512 +                sb = new StringBuilder(s.length()+10);
   1.513 +                // mangled names must begin with a backslash:
   1.514 +                if (s.charAt(0) != ESCAPE_C && i > 0)
   1.515 +                    sb.append(NULL_ESCAPE);
   1.516 +                // append the string so far, which is unremarkable:
   1.517 +                sb.append(s.substring(0, i));
   1.518 +            }
   1.519 +
   1.520 +            // rewrite \ to \-, / to \|, etc.
   1.521 +            sb.append(ESCAPE_C);
   1.522 +            sb.append(replacementOf(c));
   1.523 +        }
   1.524 +
   1.525 +        if (sb != null)   return sb.toString();
   1.526 +
   1.527 +        return s;
   1.528 +    }
   1.529 +
   1.530 +    private static String demangle(String s) {
   1.531 +        // build this lazily, when we first meet an escape:
   1.532 +        StringBuilder sb = null;
   1.533 +
   1.534 +        int stringStart = 0;
   1.535 +        if (s.startsWith(NULL_ESCAPE))
   1.536 +            stringStart = 2;
   1.537 +
   1.538 +        for (int i = stringStart, slen = s.length(); i < slen; i++) {
   1.539 +            char c = s.charAt(i);
   1.540 +
   1.541 +            if (c == ESCAPE_C && i+1 < slen) {
   1.542 +                // might be an escape sequence
   1.543 +                char rc = s.charAt(i+1);
   1.544 +                char oc = originalOfReplacement(rc);
   1.545 +                if (oc != rc) {
   1.546 +                    // build sb if this is the first escape
   1.547 +                    if (sb == null) {
   1.548 +                        sb = new StringBuilder(s.length());
   1.549 +                        // append the string so far, which is unremarkable:
   1.550 +                        sb.append(s.substring(stringStart, i));
   1.551 +                    }
   1.552 +                    ++i;  // skip both characters
   1.553 +                    c = oc;
   1.554 +                }
   1.555 +            }
   1.556 +
   1.557 +            if (sb != null)
   1.558 +                sb.append(c);
   1.559 +        }
   1.560 +
   1.561 +        if (sb != null)   return sb.toString();
   1.562 +
   1.563 +        return s.substring(stringStart);
   1.564 +    }
   1.565 +
   1.566 +    static char ESCAPE_C = '\\';
   1.567 +    // empty escape sequence to avoid a null name or illegal prefix
   1.568 +    static char NULL_ESCAPE_C = '=';
   1.569 +    static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
   1.570 +
   1.571 +    static final String DANGEROUS_CHARS   = "\\/.;:$[]<>"; // \\ must be first
   1.572 +    static final String REPLACEMENT_CHARS =  "-|,?!%{}^_";
   1.573 +    static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
   1.574 +    static char[] DANGEROUS_CHARS_A   = DANGEROUS_CHARS.toCharArray();
   1.575 +    static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
   1.576 +    static final Character[] DANGEROUS_CHARS_CA;
   1.577 +    static {
   1.578 +        Character[] dcca = new Character[DANGEROUS_CHARS.length()];
   1.579 +        for (int i = 0; i < dcca.length; i++)
   1.580 +            dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
   1.581 +        DANGEROUS_CHARS_CA = dcca;
   1.582 +    }
   1.583 +
   1.584 +    static final long[] SPECIAL_BITMAP = new long[2];  // 128 bits
   1.585 +    static {
   1.586 +        String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
   1.587 +        //System.out.println("SPECIAL = "+SPECIAL);
   1.588 +        for (char c : SPECIAL.toCharArray()) {
   1.589 +            SPECIAL_BITMAP[c >>> 6] |= 1L << c;
   1.590 +        }
   1.591 +    }
   1.592 +    static boolean isSpecial(char c) {
   1.593 +        if ((c >>> 6) < SPECIAL_BITMAP.length)
   1.594 +            return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
   1.595 +        else
   1.596 +            return false;
   1.597 +    }
   1.598 +    static char replacementOf(char c) {
   1.599 +        if (!isSpecial(c))  return c;
   1.600 +        int i = DANGEROUS_CHARS.indexOf(c);
   1.601 +        if (i < 0)  return c;
   1.602 +        return REPLACEMENT_CHARS.charAt(i);
   1.603 +    }
   1.604 +    static char originalOfReplacement(char c) {
   1.605 +        if (!isSpecial(c))  return c;
   1.606 +        int i = REPLACEMENT_CHARS.indexOf(c);
   1.607 +        if (i < 0)  return c;
   1.608 +        return DANGEROUS_CHARS.charAt(i);
   1.609 +    }
   1.610 +    static boolean isDangerous(char c) {
   1.611 +        if (!isSpecial(c))  return false;
   1.612 +        return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
   1.613 +    }
   1.614 +    static int indexOfDangerousChar(String s, int from) {
   1.615 +        for (int i = from, slen = s.length(); i < slen; i++) {
   1.616 +            if (isDangerous(s.charAt(i)))
   1.617 +                return i;
   1.618 +        }
   1.619 +        return -1;
   1.620 +    }
   1.621 +    static int lastIndexOfDangerousChar(String s, int from) {
   1.622 +        for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
   1.623 +            if (isDangerous(s.charAt(i)))
   1.624 +                return i;
   1.625 +        }
   1.626 +        return -1;
   1.627 +    }
   1.628 +
   1.629 +
   1.630 +}