new String(byte[], 'utf-8') does necessary conversion emul
authorJaroslav Tulach <jaroslav.tulach@apidesign.org>
Wed, 30 Jan 2013 09:32:05 +0100
branchemul
changeset 6086e9328ca3462
parent 606 146b81b14ac6
child 610 0127bd22630c
new String(byte[], 'utf-8') does necessary conversion
emul/mini/src/main/java/java/lang/String.java
vmtest/src/test/java/org/apidesign/bck2brwsr/tck/CompareStringsTest.java
     1.1 --- a/emul/mini/src/main/java/java/lang/String.java	Wed Jan 30 08:09:51 2013 +0100
     1.2 +++ b/emul/mini/src/main/java/java/lang/String.java	Wed Jan 30 09:32:05 2013 +0100
     1.3 @@ -25,6 +25,7 @@
     1.4  
     1.5  package java.lang;
     1.6  
     1.7 +import java.io.UnsupportedEncodingException;
     1.8  import java.util.Comparator;
     1.9  import org.apidesign.bck2brwsr.core.ExtraJavaScript;
    1.10  import org.apidesign.bck2brwsr.core.JavaScriptBody;
    1.11 @@ -415,17 +416,11 @@
    1.12       *
    1.13       * @since  JDK1.1
    1.14       */
    1.15 -//    public String(byte bytes[], int offset, int length, String charsetName)
    1.16 -//        throws UnsupportedEncodingException
    1.17 -//    {
    1.18 -//        if (charsetName == null)
    1.19 -//            throw new NullPointerException("charsetName");
    1.20 -//        checkBounds(bytes, offset, length);
    1.21 -//        char[] v = StringCoding.decode(charsetName, bytes, offset, length);
    1.22 -//        this.offset = 0;
    1.23 -//        this.count = v.length;
    1.24 -//        this.value = v;
    1.25 -//    }
    1.26 +    public String(byte bytes[], int offset, int length, String charsetName)
    1.27 +        throws UnsupportedEncodingException
    1.28 +    {
    1.29 +        this(checkUTF8(bytes, charsetName), offset, length);
    1.30 +    }
    1.31  
    1.32      /**
    1.33       * Constructs a new {@code String} by decoding the specified subarray of
    1.34 @@ -492,11 +487,11 @@
    1.35       *
    1.36       * @since  JDK1.1
    1.37       */
    1.38 -//    public String(byte bytes[], String charsetName)
    1.39 -//        throws UnsupportedEncodingException
    1.40 -//    {
    1.41 -//        this(bytes, 0, bytes.length, charsetName);
    1.42 -//    }
    1.43 +    public String(byte bytes[], String charsetName)
    1.44 +        throws UnsupportedEncodingException
    1.45 +    {
    1.46 +        this(bytes, 0, bytes.length, charsetName);
    1.47 +    }
    1.48  
    1.49      /**
    1.50       * Constructs a new {@code String} by decoding the specified array of
    1.51 @@ -553,10 +548,14 @@
    1.52      public String(byte bytes[], int offset, int length) {
    1.53          checkBounds(bytes, offset, length);
    1.54          char[] v  = new char[length];
    1.55 -        for (int i = 0; i < length; i++) {
    1.56 -            v[i] = (char)bytes[offset++];
    1.57 +        int[] at = { offset };
    1.58 +        int end = offset + length;
    1.59 +        int chlen = 0;
    1.60 +        while (at[0] < end) {
    1.61 +            int ch = nextChar(bytes, at);
    1.62 +            v[chlen++] = (char)ch;
    1.63          }
    1.64 -        this.r = new String(v, 0, v.length);
    1.65 +        this.r = new String(v, 0, chlen);
    1.66      }
    1.67  
    1.68      /**
    1.69 @@ -925,12 +924,12 @@
    1.70       *
    1.71       * @since  JDK1.1
    1.72       */
    1.73 -//    public byte[] getBytes(String charsetName)
    1.74 -//        throws UnsupportedEncodingException
    1.75 -//    {
    1.76 -//        if (charsetName == null) throw new NullPointerException();
    1.77 -//        return StringCoding.encode(charsetName, value, offset, count);
    1.78 -//    }
    1.79 +    public byte[] getBytes(String charsetName)
    1.80 +        throws UnsupportedEncodingException
    1.81 +    {
    1.82 +        checkUTF8(null, charsetName);
    1.83 +        return getBytes();
    1.84 +    }
    1.85  
    1.86      /**
    1.87       * Encodes this {@code String} into a sequence of bytes using the given
    1.88 @@ -1224,7 +1223,7 @@
    1.89      private static int offset() {
    1.90          return 0;
    1.91      }
    1.92 -    
    1.93 +
    1.94      private static class CaseInsensitiveComparator
    1.95                           implements Comparator<String>, java.io.Serializable {
    1.96          // use serialVersionUID from JDK 1.2.2 for interoperability
    1.97 @@ -3021,4 +3020,57 @@
    1.98       *          guaranteed to be from a pool of unique strings.
    1.99       */
   1.100      public native String intern();
   1.101 +    
   1.102 +    
   1.103 +    private static <T> T checkUTF8(T data, String charsetName)
   1.104 +        throws UnsupportedEncodingException {
   1.105 +        if (charsetName == null) {
   1.106 +            throw new NullPointerException("charsetName");
   1.107 +        }
   1.108 +        if (!charsetName.equalsIgnoreCase("UTF-8")
   1.109 +            && !charsetName.equalsIgnoreCase("UTF8")) {
   1.110 +            throw new UnsupportedEncodingException(charsetName);
   1.111 +        }
   1.112 +        return data;
   1.113 +    }
   1.114 +    
   1.115 +    private static int nextChar(byte[] arr, int[] index) throws IndexOutOfBoundsException {
   1.116 +        int c = arr[index[0]++] & 0xff;
   1.117 +        switch (c >> 4) {
   1.118 +            case 0:
   1.119 +            case 1:
   1.120 +            case 2:
   1.121 +            case 3:
   1.122 +            case 4:
   1.123 +            case 5:
   1.124 +            case 6:
   1.125 +            case 7:
   1.126 +                /* 0xxxxxxx*/
   1.127 +                return c;
   1.128 +            case 12:
   1.129 +            case 13: {
   1.130 +                /* 110x xxxx   10xx xxxx*/
   1.131 +                int char2 = (int) arr[index[0]++];
   1.132 +                if ((char2 & 0xC0) != 0x80) {
   1.133 +                    throw new IndexOutOfBoundsException("malformed input");
   1.134 +                }
   1.135 +                return (((c & 0x1F) << 6) | (char2 & 0x3F));
   1.136 +            }
   1.137 +            case 14: {
   1.138 +                /* 1110 xxxx  10xx xxxx  10xx xxxx */
   1.139 +                int char2 = arr[index[0]++];
   1.140 +                int char3 = arr[index[0]++];
   1.141 +                if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
   1.142 +                    throw new IndexOutOfBoundsException("malformed input");
   1.143 +                }
   1.144 +                return (((c & 0x0F) << 12)
   1.145 +                    | ((char2 & 0x3F) << 6)
   1.146 +                    | ((char3 & 0x3F) << 0));
   1.147 +            }
   1.148 +            default:
   1.149 +                /* 10xx xxxx,  1111 xxxx */
   1.150 +                throw new IndexOutOfBoundsException("malformed input");
   1.151 +        }
   1.152 +        
   1.153 +    }
   1.154  }
     2.1 --- a/vmtest/src/test/java/org/apidesign/bck2brwsr/tck/CompareStringsTest.java	Wed Jan 30 08:09:51 2013 +0100
     2.2 +++ b/vmtest/src/test/java/org/apidesign/bck2brwsr/tck/CompareStringsTest.java	Wed Jan 30 09:32:05 2013 +0100
     2.3 @@ -17,6 +17,7 @@
     2.4   */
     2.5  package org.apidesign.bck2brwsr.tck;
     2.6  
     2.7 +import java.io.UnsupportedEncodingException;
     2.8  import java.net.MalformedURLException;
     2.9  import java.net.URL;
    2.10  import org.apidesign.bck2brwsr.vmtest.Compare;
    2.11 @@ -120,6 +121,21 @@
    2.12          NullField nf = new NullField();
    2.13          return ("" + nf.name).toString();
    2.14      }
    2.15 +    @Compare
    2.16 +    public String toUTFString() throws UnsupportedEncodingException {
    2.17 +        byte[] arr = {
    2.18 +            (byte) -59, (byte) -67, (byte) 108, (byte) 117, (byte) -59, (byte) -91,
    2.19 +            (byte) 111, (byte) 117, (byte) -60, (byte) -115, (byte) 107, (byte) -61,
    2.20 +            (byte) -67, (byte) 32, (byte) 107, (byte) -59, (byte) -81, (byte) -59,
    2.21 +            (byte) -120
    2.22 +        };
    2.23 +        return new String(arr, "utf-8");
    2.24 +    }
    2.25 +
    2.26 +    @Compare
    2.27 +    public int stringToBytesLenght() throws UnsupportedEncodingException {
    2.28 +        return "Žluťoučký kůň".getBytes("utf8").length;
    2.29 +    }
    2.30  
    2.31      @Factory
    2.32      public static Object[] create() {