jaroslav@1646: /* jaroslav@1646: * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved. jaroslav@1646: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. jaroslav@1646: * jaroslav@1646: * This code is free software; you can redistribute it and/or modify it jaroslav@1646: * under the terms of the GNU General Public License version 2 only, as jaroslav@1646: * published by the Free Software Foundation. Oracle designates this jaroslav@1646: * particular file as subject to the "Classpath" exception as provided jaroslav@1646: * by Oracle in the LICENSE file that accompanied this code. jaroslav@1646: * jaroslav@1646: * This code is distributed in the hope that it will be useful, but WITHOUT jaroslav@1646: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or jaroslav@1646: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License jaroslav@1646: * version 2 for more details (a copy is included in the LICENSE file that jaroslav@1646: * accompanied this code). jaroslav@1646: * jaroslav@1646: * You should have received a copy of the GNU General Public License version jaroslav@1646: * 2 along with this work; if not, write to the Free Software Foundation, jaroslav@1646: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. jaroslav@1646: * jaroslav@1646: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA jaroslav@1646: * or visit www.oracle.com if you need additional information or have any jaroslav@1646: * questions. jaroslav@1646: */ jaroslav@1646: jaroslav@1646: package sun.invoke.util; jaroslav@1646: jaroslav@1646: /** jaroslav@1646: * Utility routines for dealing with bytecode-level names. jaroslav@1646: * Includes universal mangling rules for the JVM. jaroslav@1646: * jaroslav@1646: *

Avoiding Dangerous Characters

jaroslav@1646: * jaroslav@1646: *

jaroslav@1646: * The JVM defines a very small set of characters which are illegal jaroslav@1646: * in name spellings. We will slightly extend and regularize this set jaroslav@1646: * into a group of dangerous characters. jaroslav@1646: * These characters will then be replaced, in mangled names, by escape sequences. jaroslav@1646: * In addition, accidental escape sequences must be further escaped. jaroslav@1646: * Finally, a special prefix will be applied if and only if jaroslav@1646: * the mangling would otherwise fail to begin with the escape character. jaroslav@1646: * This happens to cover the corner case of the null string, jaroslav@1646: * and also clearly marks symbols which need demangling. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * Dangerous characters are the union of all characters forbidden jaroslav@1646: * or otherwise restricted by the JVM specification, jaroslav@1646: * plus their mates, if they are brackets jaroslav@1646: * ([ and ], jaroslav@1646: * < and >), jaroslav@1646: * plus, arbitrarily, the colon character :. jaroslav@1646: * There is no distinction between type, method, and field names. jaroslav@1646: * This makes it easier to convert between mangled names of different jaroslav@1646: * types, since they do not need to be decoded (demangled). jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * The escape character is backslash \ jaroslav@1646: * (also known as reverse solidus). jaroslav@1646: * This character is, until now, unheard of in bytecode names, jaroslav@1646: * but traditional in the proposed role. jaroslav@1646: * jaroslav@1646: *

jaroslav@1646: *

Replacement Characters

jaroslav@1646: * jaroslav@1646: * jaroslav@1646: *

jaroslav@1646: * Every escape sequence is two characters jaroslav@1646: * (in fact, two UTF8 bytes) beginning with jaroslav@1646: * the escape character and followed by a jaroslav@1646: * replacement character. jaroslav@1646: * (Since the replacement character is never a backslash, jaroslav@1646: * iterated manglings do not double in size.) jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * Each dangerous character has some rough visual similarity jaroslav@1646: * to its corresponding replacement character. jaroslav@1646: * This makes mangled symbols easier to recognize by sight. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * The dangerous characters are jaroslav@1646: * / (forward slash, used to delimit package components), jaroslav@1646: * . (dot, also a package delimiter), jaroslav@1646: * ; (semicolon, used in signatures), jaroslav@1646: * $ (dollar, used in inner classes and synthetic members), jaroslav@1646: * < (left angle), jaroslav@1646: * > (right angle), jaroslav@1646: * [ (left square bracket, used in array types), jaroslav@1646: * ] (right square bracket, reserved in this scheme for language use), jaroslav@1646: * and : (colon, reserved in this scheme for language use). jaroslav@1646: * Their replacements are, respectively, jaroslav@1646: * | (vertical bar), jaroslav@1646: * , (comma), jaroslav@1646: * ? (question mark), jaroslav@1646: * % (percent), jaroslav@1646: * ^ (caret), jaroslav@1646: * _ (underscore), and jaroslav@1646: * { (left curly bracket), jaroslav@1646: * } (right curly bracket), jaroslav@1646: * ! (exclamation mark). jaroslav@1646: * In addition, the replacement character for the escape character itself is jaroslav@1646: * - (hyphen), jaroslav@1646: * and the replacement character for the null prefix is jaroslav@1646: * = (equal sign). jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * An escape character \ jaroslav@1646: * followed by any of these replacement characters jaroslav@1646: * is an escape sequence, and there are no other escape sequences. jaroslav@1646: * An equal sign is only part of an escape sequence jaroslav@1646: * if it is the second character in the whole string, following a backslash. jaroslav@1646: * Two consecutive backslashes do not form an escape sequence. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * Each escape sequence replaces a so-called original character jaroslav@1646: * which is either one of the dangerous characters or the escape character. jaroslav@1646: * A null prefix replaces an initial null string, not a character. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * All this implies that escape sequences cannot overlap and may be jaroslav@1646: * determined all at once for a whole string. Note that a spelling jaroslav@1646: * string can contain accidental escapes, apparent escape jaroslav@1646: * sequences which must not be interpreted as manglings. jaroslav@1646: * These are disabled by replacing their leading backslash with an jaroslav@1646: * escape sequence (\-). To mangle a string, three logical steps jaroslav@1646: * are required, though they may be carried out in one pass: jaroslav@1646: *

jaroslav@1646: *
    jaroslav@1646: *
  1. In each accidental escape, replace the backslash with an escape sequence jaroslav@1646: * (\-).
  2. jaroslav@1646: *
  3. Replace each dangerous character with an escape sequence jaroslav@1646: * (\| for /, etc.).
  4. jaroslav@1646: *
  5. If the first two steps introduced any change, and jaroslav@1646: * if the string does not already begin with a backslash, prepend a null prefix (\=).
  6. jaroslav@1646: *
jaroslav@1646: * jaroslav@1646: * To demangle a mangled string that begins with an escape, jaroslav@1646: * remove any null prefix, and then replace (in parallel) jaroslav@1646: * each escape sequence by its original character. jaroslav@1646: *

Spelling strings which contain accidental jaroslav@1646: * escapes must have them replaced, even if those jaroslav@1646: * strings do not contain dangerous characters. jaroslav@1646: * This restriction means that mangling a string always jaroslav@1646: * requires a scan of the string for escapes. jaroslav@1646: * But then, a scan would be required anyway, jaroslav@1646: * to check for dangerous characters. jaroslav@1646: * jaroslav@1646: *

jaroslav@1646: *

Nice Properties

jaroslav@1646: * jaroslav@1646: *

jaroslav@1646: * If a bytecode name does not contain any escape sequence, jaroslav@1646: * demangling is a no-op: The string demangles to itself. jaroslav@1646: * Such a string is called self-mangling. jaroslav@1646: * Almost all strings are self-mangling. jaroslav@1646: * In practice, to demangle almost any name “found in nature”, jaroslav@1646: * simply verify that it does not begin with a backslash. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * Mangling is a one-to-one function, while demangling jaroslav@1646: * is a many-to-one function. jaroslav@1646: * A mangled string is defined as validly mangled if jaroslav@1646: * it is in fact the unique mangling of its spelling string. jaroslav@1646: * Three examples of invalidly mangled strings are \=foo, jaroslav@1646: * \-bar, and baz\!, which demangle to foo, \bar, and jaroslav@1646: * baz\!, but then remangle to foo, \bar, and \=baz\-!. jaroslav@1646: * If a language back-end or runtime is using mangled names, jaroslav@1646: * it should never present an invalidly mangled bytecode jaroslav@1646: * name to the JVM. If the runtime encounters one, jaroslav@1646: * it should also report an error, since such an occurrence jaroslav@1646: * probably indicates a bug in name encoding which jaroslav@1646: * will lead to errors in linkage. jaroslav@1646: * However, this note does not propose that the JVM verifier jaroslav@1646: * detect invalidly mangled names. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * As a result of these rules, it is a simple matter to jaroslav@1646: * compute validly mangled substrings and concatenations jaroslav@1646: * of validly mangled strings, and (with a little care) jaroslav@1646: * these correspond to corresponding operations on their jaroslav@1646: * spelling strings. jaroslav@1646: *

jaroslav@1646: * jaroslav@1646: *

If languages that include non-Java symbol spellings use this jaroslav@1646: * mangling convention, they will enjoy the following advantages: jaroslav@1646: *

jaroslav@1646: * jaroslav@1646: * jaroslav@1646: * jaroslav@1646: *

Suggestions for Human Readable Presentations

jaroslav@1646: * jaroslav@1646: * jaroslav@1646: *

jaroslav@1646: * For human readable displays of symbols, jaroslav@1646: * it will be better to present a string-like quoted jaroslav@1646: * representation of the spelling, because JVM users jaroslav@1646: * are generally familiar with such tokens. jaroslav@1646: * We suggest using single or double quotes before and after jaroslav@1646: * mangled symbols which are not valid Java identifiers, jaroslav@1646: * with quotes, backslashes, and non-printing characters jaroslav@1646: * escaped as if for literals in the Java language. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * For example, an HTML-like spelling jaroslav@1646: * <pre> mangles to jaroslav@1646: * \^pre\_ and could jaroslav@1646: * display more cleanly as jaroslav@1646: * '<pre>', jaroslav@1646: * with the quotes included. jaroslav@1646: * Such string-like conventions are not suitable jaroslav@1646: * for mangled bytecode names, in part because jaroslav@1646: * dangerous characters must be eliminated, rather jaroslav@1646: * than just quoted. Otherwise internally structured jaroslav@1646: * strings like package prefixes and method signatures jaroslav@1646: * could not be reliably parsed. jaroslav@1646: *

jaroslav@1646: *

jaroslav@1646: * In such human-readable displays, invalidly mangled jaroslav@1646: * names should not be demangled and quoted, jaroslav@1646: * for this would be misleading. Likewise, JVM symbols jaroslav@1646: * which contain dangerous characters (like dots in field jaroslav@1646: * names or brackets in method names) should not be jaroslav@1646: * simply quoted. The bytecode names jaroslav@1646: * \=phase\,1 and jaroslav@1646: * phase.1 are distinct, jaroslav@1646: * and in demangled displays they should be presented as jaroslav@1646: * 'phase.1' and something like jaroslav@1646: * 'phase'.1, respectively. jaroslav@1646: *

jaroslav@1646: * jaroslav@1646: * @author John Rose jaroslav@1646: * @version 1.2, 02/06/2008 jaroslav@1646: * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm jaroslav@1646: */ jaroslav@1646: public class BytecodeName { jaroslav@1646: private BytecodeName() { } // static only class jaroslav@1646: jaroslav@1646: /** Given a source name, produce the corresponding bytecode name. jaroslav@1646: * The source name should not be qualified, because any syntactic jaroslav@1646: * markers (dots, slashes, dollar signs, colons, etc.) will be mangled. jaroslav@1646: * @param s the source name jaroslav@1646: * @return a valid bytecode name which represents the source name jaroslav@1646: */ jaroslav@1646: public static String toBytecodeName(String s) { jaroslav@1646: String bn = mangle(s); jaroslav@1646: assert((Object)bn == s || looksMangled(bn)) : bn; jaroslav@1646: assert(s.equals(toSourceName(bn))) : s; jaroslav@1646: return bn; jaroslav@1646: } jaroslav@1646: jaroslav@1646: /** Given an unqualified bytecode name, produce the corresponding source name. jaroslav@1646: * The bytecode name must not contain dangerous characters. jaroslav@1646: * In particular, it must not be qualified or segmented by colon {@code ':'}. jaroslav@1646: * @param s the bytecode name jaroslav@1646: * @return the source name, which may possibly have unsafe characters jaroslav@1646: * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe} jaroslav@1646: * @see #isSafeBytecodeName(java.lang.String) jaroslav@1646: */ jaroslav@1646: public static String toSourceName(String s) { jaroslav@1646: checkSafeBytecodeName(s); jaroslav@1646: String sn = s; jaroslav@1646: if (looksMangled(s)) { jaroslav@1646: sn = demangle(s); jaroslav@1646: assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn); jaroslav@1646: } jaroslav@1646: return sn; jaroslav@1646: } jaroslav@1646: jaroslav@1646: /** jaroslav@1646: * Given a bytecode name from a classfile, separate it into jaroslav@1646: * components delimited by dangerous characters. jaroslav@1646: * Each resulting array element will be either a dangerous character, jaroslav@1646: * or else a safe bytecode name. jaroslav@1646: * (The safe name might possibly be mangled to hide further dangerous characters.) jaroslav@1646: * For example, the qualified class name {@code java/lang/String} jaroslav@1646: * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}. jaroslav@1646: * The name {@code <init>} will be parsed into { '<', "init", '>'}} jaroslav@1646: * The name {@code foo/bar$:baz} will be parsed into jaroslav@1646: * {@code {"foo", '/', "bar", '$', ':', "baz"}}. jaroslav@1646: * The name {@code ::\=:foo:\=bar\!baz} will be parsed into jaroslav@1646: * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}. jaroslav@1646: */ jaroslav@1646: public static Object[] parseBytecodeName(String s) { jaroslav@1646: int slen = s.length(); jaroslav@1646: Object[] res = null; jaroslav@1646: for (int pass = 0; pass <= 1; pass++) { jaroslav@1646: int fillp = 0; jaroslav@1646: int lasti = 0; jaroslav@1646: for (int i = 0; i <= slen; i++) { jaroslav@1646: int whichDC = -1; jaroslav@1646: if (i < slen) { jaroslav@1646: whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i)); jaroslav@1646: if (whichDC < DANGEROUS_CHAR_FIRST_INDEX) continue; jaroslav@1646: } jaroslav@1646: // got to end of string or next dangerous char jaroslav@1646: if (lasti < i) { jaroslav@1646: // normal component jaroslav@1646: if (pass != 0) jaroslav@1646: res[fillp] = toSourceName(s.substring(lasti, i)); jaroslav@1646: fillp++; jaroslav@1646: lasti = i+1; jaroslav@1646: } jaroslav@1646: if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) { jaroslav@1646: if (pass != 0) jaroslav@1646: res[fillp] = DANGEROUS_CHARS_CA[whichDC]; jaroslav@1646: fillp++; jaroslav@1646: lasti = i+1; jaroslav@1646: } jaroslav@1646: } jaroslav@1646: if (pass != 0) break; jaroslav@1646: // between passes, build the result array jaroslav@1646: res = new Object[fillp]; jaroslav@1646: if (fillp <= 1 && lasti == 0) { jaroslav@1646: if (fillp != 0) res[0] = toSourceName(s); jaroslav@1646: break; jaroslav@1646: } jaroslav@1646: } jaroslav@1646: return res; jaroslav@1646: } jaroslav@1646: jaroslav@1646: /** jaroslav@1646: * Given a series of components, create a bytecode name for a classfile. jaroslav@1646: * This is the inverse of {@link #parseBytecodeName(java.lang.String)}. jaroslav@1646: * Each component must either be an interned one-character string of jaroslav@1646: * a dangerous character, or else a safe bytecode name. jaroslav@1646: * @param components a series of name components jaroslav@1646: * @return the concatenation of all components jaroslav@1646: * @throws IllegalArgumentException if any component contains an unsafe jaroslav@1646: * character, and is not an interned one-character string jaroslav@1646: * @throws NullPointerException if any component is null jaroslav@1646: */ jaroslav@1646: public static String unparseBytecodeName(Object[] components) { jaroslav@1646: Object[] components0 = components; jaroslav@1646: for (int i = 0; i < components.length; i++) { jaroslav@1646: Object c = components[i]; jaroslav@1646: if (c instanceof String) { jaroslav@1646: String mc = toBytecodeName((String) c); jaroslav@1646: if (i == 0 && components.length == 1) jaroslav@1646: return mc; // usual case jaroslav@1646: if ((Object)mc != c) { jaroslav@1646: if (components == components0) jaroslav@1646: components = components.clone(); jaroslav@1646: components[i] = c = mc; jaroslav@1646: } jaroslav@1646: } jaroslav@1646: } jaroslav@1646: return appendAll(components); jaroslav@1646: } jaroslav@1646: private static String appendAll(Object[] components) { jaroslav@1646: if (components.length <= 1) { jaroslav@1646: if (components.length == 1) { jaroslav@1646: return String.valueOf(components[0]); jaroslav@1646: } jaroslav@1646: return ""; jaroslav@1646: } jaroslav@1646: int slen = 0; jaroslav@1646: for (Object c : components) { jaroslav@1646: if (c instanceof String) jaroslav@1646: slen += String.valueOf(c).length(); jaroslav@1646: else jaroslav@1646: slen += 1; jaroslav@1646: } jaroslav@1646: StringBuilder sb = new StringBuilder(slen); jaroslav@1646: for (Object c : components) { jaroslav@1646: sb.append(c); jaroslav@1646: } jaroslav@1646: return sb.toString(); jaroslav@1646: } jaroslav@1646: jaroslav@1646: /** jaroslav@1646: * Given a bytecode name, produce the corresponding display name. jaroslav@1646: * This is the source name, plus quotes if needed. jaroslav@1646: * If the bytecode name contains dangerous characters, jaroslav@1646: * assume that they are being used as punctuation, jaroslav@1646: * and pass them through unchanged. jaroslav@1646: * Non-empty runs of non-dangerous characters are demangled jaroslav@1646: * if necessary, and the resulting names are quoted if jaroslav@1646: * they are not already valid Java identifiers, or if jaroslav@1646: * they contain a dangerous character (i.e., dollar sign "$"). jaroslav@1646: * Single quotes are used when quoting. jaroslav@1646: * Within quoted names, embedded single quotes and backslashes jaroslav@1646: * are further escaped by prepended backslashes. jaroslav@1646: * jaroslav@1646: * @param s the original bytecode name (which may be qualified) jaroslav@1646: * @return a human-readable presentation jaroslav@1646: */ jaroslav@1646: public static String toDisplayName(String s) { jaroslav@1646: Object[] components = parseBytecodeName(s); jaroslav@1646: for (int i = 0; i < components.length; i++) { jaroslav@1646: if (!(components[i] instanceof String)) jaroslav@1646: continue; jaroslav@1646: String sn = (String) components[i]; jaroslav@1646: // note that the name is already demangled! jaroslav@1646: //sn = toSourceName(sn); jaroslav@1646: if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) { jaroslav@1646: components[i] = quoteDisplay(sn); jaroslav@1646: } jaroslav@1646: } jaroslav@1646: return appendAll(components); jaroslav@1646: } jaroslav@1646: private static boolean isJavaIdent(String s) { jaroslav@1646: int slen = s.length(); jaroslav@1646: if (slen == 0) return false; jaroslav@1646: if (!Character.isJavaIdentifierStart(s.charAt(0))) jaroslav@1646: return false; jaroslav@1646: for (int i = 1; i < slen; i++) { jaroslav@1646: if (!Character.isJavaIdentifierPart(s.charAt(i))) jaroslav@1646: return false; jaroslav@1646: } jaroslav@1646: return true; jaroslav@1646: } jaroslav@1646: private static String quoteDisplay(String s) { jaroslav@1646: // TO DO: Replace wierd characters in s by C-style escapes. jaroslav@1646: return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'"; jaroslav@1646: } jaroslav@1646: jaroslav@1646: private static void checkSafeBytecodeName(String s) jaroslav@1646: throws IllegalArgumentException { jaroslav@1646: if (!isSafeBytecodeName(s)) { jaroslav@1646: throw new IllegalArgumentException(s); jaroslav@1646: } jaroslav@1646: } jaroslav@1646: jaroslav@1646: /** jaroslav@1646: * Report whether a simple name is safe as a bytecode name. jaroslav@1646: * Such names are acceptable in class files as class, method, and field names. jaroslav@1646: * Additionally, they are free of "dangerous" characters, even if those jaroslav@1646: * characters are legal in some (or all) names in class files. jaroslav@1646: * @param s the proposed bytecode name jaroslav@1646: * @return true if the name is non-empty and all of its characters are safe jaroslav@1646: */ jaroslav@1646: public static boolean isSafeBytecodeName(String s) { jaroslav@1646: if (s.length() == 0) return false; jaroslav@1646: // check occurrences of each DANGEROUS char jaroslav@1646: for (char xc : DANGEROUS_CHARS_A) { jaroslav@1646: if (xc == ESCAPE_C) continue; // not really that dangerous jaroslav@1646: if (s.indexOf(xc) >= 0) return false; jaroslav@1646: } jaroslav@1646: return true; jaroslav@1646: } jaroslav@1646: jaroslav@1646: /** jaroslav@1646: * Report whether a character is safe in a bytecode name. jaroslav@1646: * This is true of any unicode character except the following jaroslav@1646: * dangerous characters: {@code ".;:$[]<>/"}. jaroslav@1646: * @param s the proposed character jaroslav@1646: * @return true if the character is safe to use in classfiles jaroslav@1646: */ jaroslav@1646: public static boolean isSafeBytecodeChar(char c) { jaroslav@1646: return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX; jaroslav@1646: } jaroslav@1646: jaroslav@1646: private static boolean looksMangled(String s) { jaroslav@1646: return s.charAt(0) == ESCAPE_C; jaroslav@1646: } jaroslav@1646: jaroslav@1646: private static String mangle(String s) { jaroslav@1646: if (s.length() == 0) jaroslav@1646: return NULL_ESCAPE; jaroslav@1646: jaroslav@1646: // build this lazily, when we first need an escape: jaroslav@1646: StringBuilder sb = null; jaroslav@1646: jaroslav@1646: for (int i = 0, slen = s.length(); i < slen; i++) { jaroslav@1646: char c = s.charAt(i); jaroslav@1646: jaroslav@1646: boolean needEscape = false; jaroslav@1646: if (c == ESCAPE_C) { jaroslav@1646: if (i+1 < slen) { jaroslav@1646: char c1 = s.charAt(i+1); jaroslav@1646: if ((i == 0 && c1 == NULL_ESCAPE_C) jaroslav@1646: || c1 != originalOfReplacement(c1)) { jaroslav@1646: // an accidental escape jaroslav@1646: needEscape = true; jaroslav@1646: } jaroslav@1646: } jaroslav@1646: } else { jaroslav@1646: needEscape = isDangerous(c); jaroslav@1646: } jaroslav@1646: jaroslav@1646: if (!needEscape) { jaroslav@1646: if (sb != null) sb.append(c); jaroslav@1646: continue; jaroslav@1646: } jaroslav@1646: jaroslav@1646: // build sb if this is the first escape jaroslav@1646: if (sb == null) { jaroslav@1646: sb = new StringBuilder(s.length()+10); jaroslav@1646: // mangled names must begin with a backslash: jaroslav@1646: if (s.charAt(0) != ESCAPE_C && i > 0) jaroslav@1646: sb.append(NULL_ESCAPE); jaroslav@1646: // append the string so far, which is unremarkable: jaroslav@1646: sb.append(s.substring(0, i)); jaroslav@1646: } jaroslav@1646: jaroslav@1646: // rewrite \ to \-, / to \|, etc. jaroslav@1646: sb.append(ESCAPE_C); jaroslav@1646: sb.append(replacementOf(c)); jaroslav@1646: } jaroslav@1646: jaroslav@1646: if (sb != null) return sb.toString(); jaroslav@1646: jaroslav@1646: return s; jaroslav@1646: } jaroslav@1646: jaroslav@1646: private static String demangle(String s) { jaroslav@1646: // build this lazily, when we first meet an escape: jaroslav@1646: StringBuilder sb = null; jaroslav@1646: jaroslav@1646: int stringStart = 0; jaroslav@1646: if (s.startsWith(NULL_ESCAPE)) jaroslav@1646: stringStart = 2; jaroslav@1646: jaroslav@1646: for (int i = stringStart, slen = s.length(); i < slen; i++) { jaroslav@1646: char c = s.charAt(i); jaroslav@1646: jaroslav@1646: if (c == ESCAPE_C && i+1 < slen) { jaroslav@1646: // might be an escape sequence jaroslav@1646: char rc = s.charAt(i+1); jaroslav@1646: char oc = originalOfReplacement(rc); jaroslav@1646: if (oc != rc) { jaroslav@1646: // build sb if this is the first escape jaroslav@1646: if (sb == null) { jaroslav@1646: sb = new StringBuilder(s.length()); jaroslav@1646: // append the string so far, which is unremarkable: jaroslav@1646: sb.append(s.substring(stringStart, i)); jaroslav@1646: } jaroslav@1646: ++i; // skip both characters jaroslav@1646: c = oc; jaroslav@1646: } jaroslav@1646: } jaroslav@1646: jaroslav@1646: if (sb != null) jaroslav@1646: sb.append(c); jaroslav@1646: } jaroslav@1646: jaroslav@1646: if (sb != null) return sb.toString(); jaroslav@1646: jaroslav@1646: return s.substring(stringStart); jaroslav@1646: } jaroslav@1646: jaroslav@1646: static char ESCAPE_C = '\\'; jaroslav@1646: // empty escape sequence to avoid a null name or illegal prefix jaroslav@1646: static char NULL_ESCAPE_C = '='; jaroslav@1646: static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C; jaroslav@1646: jaroslav@1646: static final String DANGEROUS_CHARS = "\\/.;:$[]<>"; // \\ must be first jaroslav@1646: static final String REPLACEMENT_CHARS = "-|,?!%{}^_"; jaroslav@1646: static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\ jaroslav@1646: static char[] DANGEROUS_CHARS_A = DANGEROUS_CHARS.toCharArray(); jaroslav@1646: static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray(); jaroslav@1646: static final Character[] DANGEROUS_CHARS_CA; jaroslav@1646: static { jaroslav@1646: Character[] dcca = new Character[DANGEROUS_CHARS.length()]; jaroslav@1646: for (int i = 0; i < dcca.length; i++) jaroslav@1646: dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i)); jaroslav@1646: DANGEROUS_CHARS_CA = dcca; jaroslav@1646: } jaroslav@1646: jaroslav@1646: static final long[] SPECIAL_BITMAP = new long[2]; // 128 bits jaroslav@1646: static { jaroslav@1646: String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS; jaroslav@1646: //System.out.println("SPECIAL = "+SPECIAL); jaroslav@1646: for (char c : SPECIAL.toCharArray()) { jaroslav@1646: SPECIAL_BITMAP[c >>> 6] |= 1L << c; jaroslav@1646: } jaroslav@1646: } jaroslav@1646: static boolean isSpecial(char c) { jaroslav@1646: if ((c >>> 6) < SPECIAL_BITMAP.length) jaroslav@1646: return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0; jaroslav@1646: else jaroslav@1646: return false; jaroslav@1646: } jaroslav@1646: static char replacementOf(char c) { jaroslav@1646: if (!isSpecial(c)) return c; jaroslav@1646: int i = DANGEROUS_CHARS.indexOf(c); jaroslav@1646: if (i < 0) return c; jaroslav@1646: return REPLACEMENT_CHARS.charAt(i); jaroslav@1646: } jaroslav@1646: static char originalOfReplacement(char c) { jaroslav@1646: if (!isSpecial(c)) return c; jaroslav@1646: int i = REPLACEMENT_CHARS.indexOf(c); jaroslav@1646: if (i < 0) return c; jaroslav@1646: return DANGEROUS_CHARS.charAt(i); jaroslav@1646: } jaroslav@1646: static boolean isDangerous(char c) { jaroslav@1646: if (!isSpecial(c)) return false; jaroslav@1646: return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX); jaroslav@1646: } jaroslav@1646: static int indexOfDangerousChar(String s, int from) { jaroslav@1646: for (int i = from, slen = s.length(); i < slen; i++) { jaroslav@1646: if (isDangerous(s.charAt(i))) jaroslav@1646: return i; jaroslav@1646: } jaroslav@1646: return -1; jaroslav@1646: } jaroslav@1646: static int lastIndexOfDangerousChar(String s, int from) { jaroslav@1646: for (int i = Math.min(from, s.length()-1); i >= 0; i--) { jaroslav@1646: if (isDangerous(s.charAt(i))) jaroslav@1646: return i; jaroslav@1646: } jaroslav@1646: return -1; jaroslav@1646: } jaroslav@1646: jaroslav@1646: jaroslav@1646: }