1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/emul/compact/src/main/java/java/net/URI.java	Sat Sep 07 13:51:24 2013 +0200
     1.3 @@ -0,0 +1,3524 @@
     1.4 +/*
     1.5 + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package java.net;
    1.30 +
    1.31 +import java.io.IOException;
    1.32 +import java.io.InvalidObjectException;
    1.33 +import java.io.ObjectInputStream;
    1.34 +import java.io.ObjectOutputStream;
    1.35 +import java.io.Serializable;
    1.36 +import java.nio.ByteBuffer;
    1.37 +import java.nio.CharBuffer;
    1.38 +import java.nio.charset.CharsetDecoder;
    1.39 +import java.nio.charset.CharsetEncoder;
    1.40 +import java.nio.charset.CoderResult;
    1.41 +import java.nio.charset.CodingErrorAction;
    1.42 +import java.nio.charset.CharacterCodingException;
    1.43 +import java.text.Normalizer;
    1.44 +import sun.nio.cs.ThreadLocalCoders;
    1.45 +
    1.46 +import java.lang.Character;             // for javadoc
    1.47 +import java.lang.NullPointerException;  // for javadoc
    1.48 +
    1.49 +
    1.50 +/**
    1.51 + * Represents a Uniform Resource Identifier (URI) reference.
    1.52 + *
    1.53 + * <p> Aside from some minor deviations noted below, an instance of this
    1.54 + * class represents a URI reference as defined by
    1.55 + * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
    1.56 + * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
    1.57 + * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
    1.58 + * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
    1.59 + * also supports scope_ids. The syntax and usage of scope_ids is described
    1.60 + * <a href="Inet6Address.html#scoped">here</a>.
    1.61 + * This class provides constructors for creating URI instances from
    1.62 + * their components or by parsing their string forms, methods for accessing the
    1.63 + * various components of an instance, and methods for normalizing, resolving,
    1.64 + * and relativizing URI instances.  Instances of this class are immutable.
    1.65 + *
    1.66 + *
    1.67 + * <h4> URI syntax and components </h4>
    1.68 + *
    1.69 + * At the highest level a URI reference (hereinafter simply "URI") in string
    1.70 + * form has the syntax
    1.71 + *
    1.72 + * <blockquote>
    1.73 + * [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>]
    1.74 + * </blockquote>
    1.75 + *
    1.76 + * where square brackets [...] delineate optional components and the characters
    1.77 + * <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves.
    1.78 + *
    1.79 + * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
    1.80 + * said to be <i>relative</i>.  URIs are also classified according to whether
    1.81 + * they are <i>opaque</i> or <i>hierarchical</i>.
    1.82 + *
    1.83 + * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
    1.84 + * not begin with a slash character (<tt>'/'</tt>).  Opaque URIs are not
    1.85 + * subject to further parsing.  Some examples of opaque URIs are:
    1.86 + *
    1.87 + * <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
    1.88 + * <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr>
    1.89 + * <tr><td><tt>news:comp.lang.java</tt><td></tr>
    1.90 + * <tr><td><tt>urn:isbn:096139210x</tt></td></tr>
    1.91 + * </table></blockquote>
    1.92 + *
    1.93 + * <p> A <i>hierarchical</i> URI is either an absolute URI whose
    1.94 + * scheme-specific part begins with a slash character, or a relative URI, that
    1.95 + * is, a URI that does not specify a scheme.  Some examples of hierarchical
    1.96 + * URIs are:
    1.97 + *
    1.98 + * <blockquote>
    1.99 + * <tt>http://java.sun.com/j2se/1.3/</tt><br>
   1.100 + * <tt>docs/guide/collections/designfaq.html#28</tt><br>
   1.101 + * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt><br>
   1.102 + * <tt>file:///~/calendar</tt>
   1.103 + * </blockquote>
   1.104 + *
   1.105 + * <p> A hierarchical URI is subject to further parsing according to the syntax
   1.106 + *
   1.107 + * <blockquote>
   1.108 + * [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>]
   1.109 + * </blockquote>
   1.110 + *
   1.111 + * where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>,
   1.112 + * <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves.  The
   1.113 + * scheme-specific part of a hierarchical URI consists of the characters
   1.114 + * between the scheme and fragment components.
   1.115 + *
   1.116 + * <p> The authority component of a hierarchical URI is, if specified, either
   1.117 + * <i>server-based</i> or <i>registry-based</i>.  A server-based authority
   1.118 + * parses according to the familiar syntax
   1.119 + *
   1.120 + * <blockquote>
   1.121 + * [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>]
   1.122 + * </blockquote>
   1.123 + *
   1.124 + * where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for
   1.125 + * themselves.  Nearly all URI schemes currently in use are server-based.  An
   1.126 + * authority component that does not parse in this way is considered to be
   1.127 + * registry-based.
   1.128 + *
   1.129 + * <p> The path component of a hierarchical URI is itself said to be absolute
   1.130 + * if it begins with a slash character (<tt>'/'</tt>); otherwise it is
   1.131 + * relative.  The path of a hierarchical URI that is either absolute or
   1.132 + * specifies an authority is always absolute.
   1.133 + *
   1.134 + * <p> All told, then, a URI instance has the following nine components:
   1.135 + *
   1.136 + * <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
   1.137 + * <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
   1.138 + * <tr><td>scheme</td><td><tt>String</tt></td></tr>
   1.139 + * <tr><td>scheme-specific-part&nbsp;&nbsp;&nbsp;&nbsp;</td><td><tt>String</tt></td></tr>
   1.140 + * <tr><td>authority</td><td><tt>String</tt></td></tr>
   1.141 + * <tr><td>user-info</td><td><tt>String</tt></td></tr>
   1.142 + * <tr><td>host</td><td><tt>String</tt></td></tr>
   1.143 + * <tr><td>port</td><td><tt>int</tt></td></tr>
   1.144 + * <tr><td>path</td><td><tt>String</tt></td></tr>
   1.145 + * <tr><td>query</td><td><tt>String</tt></td></tr>
   1.146 + * <tr><td>fragment</td><td><tt>String</tt></td></tr>
   1.147 + * </table></blockquote>
   1.148 + *
   1.149 + * In a given instance any particular component is either <i>undefined</i> or
   1.150 + * <i>defined</i> with a distinct value.  Undefined string components are
   1.151 + * represented by <tt>null</tt>, while undefined integer components are
   1.152 + * represented by <tt>-1</tt>.  A string component may be defined to have the
   1.153 + * empty string as its value; this is not equivalent to that component being
   1.154 + * undefined.
   1.155 + *
   1.156 + * <p> Whether a particular component is or is not defined in an instance
   1.157 + * depends upon the type of the URI being represented.  An absolute URI has a
   1.158 + * scheme component.  An opaque URI has a scheme, a scheme-specific part, and
   1.159 + * possibly a fragment, but has no other components.  A hierarchical URI always
   1.160 + * has a path (though it may be empty) and a scheme-specific-part (which at
   1.161 + * least contains the path), and may have any of the other components.  If the
   1.162 + * authority component is present and is server-based then the host component
   1.163 + * will be defined and the user-information and port components may be defined.
   1.164 + *
   1.165 + *
   1.166 + * <h4> Operations on URI instances </h4>
   1.167 + *
   1.168 + * The key operations supported by this class are those of
   1.169 + * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
   1.170 + *
   1.171 + * <p> <i>Normalization</i> is the process of removing unnecessary <tt>"."</tt>
   1.172 + * and <tt>".."</tt> segments from the path component of a hierarchical URI.
   1.173 + * Each <tt>"."</tt> segment is simply removed.  A <tt>".."</tt> segment is
   1.174 + * removed only if it is preceded by a non-<tt>".."</tt> segment.
   1.175 + * Normalization has no effect upon opaque URIs.
   1.176 + *
   1.177 + * <p> <i>Resolution</i> is the process of resolving one URI against another,
   1.178 + * <i>base</i> URI.  The resulting URI is constructed from components of both
   1.179 + * URIs in the manner specified by RFC&nbsp;2396, taking components from the
   1.180 + * base URI for those not specified in the original.  For hierarchical URIs,
   1.181 + * the path of the original is resolved against the path of the base and then
   1.182 + * normalized.  The result, for example, of resolving
   1.183 + *
   1.184 + * <blockquote>
   1.185 + * <tt>docs/guide/collections/designfaq.html#28&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt>(1)
   1.186 + * </blockquote>
   1.187 + *
   1.188 + * against the base URI <tt>http://java.sun.com/j2se/1.3/</tt> is the result
   1.189 + * URI
   1.190 + *
   1.191 + * <blockquote>
   1.192 + * <tt>http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28</tt>
   1.193 + * </blockquote>
   1.194 + *
   1.195 + * Resolving the relative URI
   1.196 + *
   1.197 + * <blockquote>
   1.198 + * <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java&nbsp;&nbsp;&nbsp;&nbsp;</tt>(2)
   1.199 + * </blockquote>
   1.200 + *
   1.201 + * against this result yields, in turn,
   1.202 + *
   1.203 + * <blockquote>
   1.204 + * <tt>http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java</tt>
   1.205 + * </blockquote>
   1.206 + *
   1.207 + * Resolution of both absolute and relative URIs, and of both absolute and
   1.208 + * relative paths in the case of hierarchical URIs, is supported.  Resolving
   1.209 + * the URI <tt>file:///~calendar</tt> against any other URI simply yields the
   1.210 + * original URI, since it is absolute.  Resolving the relative URI (2) above
   1.211 + * against the relative base URI (1) yields the normalized, but still relative,
   1.212 + * URI
   1.213 + *
   1.214 + * <blockquote>
   1.215 + * <tt>demo/jfc/SwingSet2/src/SwingSet2.java</tt>
   1.216 + * </blockquote>
   1.217 + *
   1.218 + * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
   1.219 + * two normalized URIs <i>u</i> and&nbsp;<i>v</i>,
   1.220 + *
   1.221 + * <blockquote>
   1.222 + *   <i>u</i><tt>.relativize(</tt><i>u</i><tt>.resolve(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>&nbsp;&nbsp;and<br>
   1.223 + *   <i>u</i><tt>.resolve(</tt><i>u</i><tt>.relativize(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>&nbsp;&nbsp;.<br>
   1.224 + * </blockquote>
   1.225 + *
   1.226 + * This operation is often useful when constructing a document containing URIs
   1.227 + * that must be made relative to the base URI of the document wherever
   1.228 + * possible.  For example, relativizing the URI
   1.229 + *
   1.230 + * <blockquote>
   1.231 + * <tt>http://java.sun.com/j2se/1.3/docs/guide/index.html</tt>
   1.232 + * </blockquote>
   1.233 + *
   1.234 + * against the base URI
   1.235 + *
   1.236 + * <blockquote>
   1.237 + * <tt>http://java.sun.com/j2se/1.3</tt>
   1.238 + * </blockquote>
   1.239 + *
   1.240 + * yields the relative URI <tt>docs/guide/index.html</tt>.
   1.241 + *
   1.242 + *
   1.243 + * <h4> Character categories </h4>
   1.244 + *
   1.245 + * RFC&nbsp;2396 specifies precisely which characters are permitted in the
   1.246 + * various components of a URI reference.  The following categories, most of
   1.247 + * which are taken from that specification, are used below to describe these
   1.248 + * constraints:
   1.249 + *
   1.250 + * <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
   1.251 + *   <tr><th valign=top><i>alpha</i></th>
   1.252 + *       <td>The US-ASCII alphabetic characters,
   1.253 + *        <tt>'A'</tt>&nbsp;through&nbsp;<tt>'Z'</tt>
   1.254 + *        and <tt>'a'</tt>&nbsp;through&nbsp;<tt>'z'</tt></td></tr>
   1.255 + *   <tr><th valign=top><i>digit</i></th>
   1.256 + *       <td>The US-ASCII decimal digit characters,
   1.257 + *       <tt>'0'</tt>&nbsp;through&nbsp;<tt>'9'</tt></td></tr>
   1.258 + *   <tr><th valign=top><i>alphanum</i></th>
   1.259 + *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
   1.260 + *   <tr><th valign=top><i>unreserved</i>&nbsp;&nbsp;&nbsp;&nbsp;</th>
   1.261 + *       <td>All <i>alphanum</i> characters together with those in the string
   1.262 + *        <tt>"_-!.~'()*"</tt></td></tr>
   1.263 + *   <tr><th valign=top><i>punct</i></th>
   1.264 + *       <td>The characters in the string <tt>",;:$&+="</tt></td></tr>
   1.265 + *   <tr><th valign=top><i>reserved</i></th>
   1.266 + *       <td>All <i>punct</i> characters together with those in the string
   1.267 + *        <tt>"?/[]@"</tt></td></tr>
   1.268 + *   <tr><th valign=top><i>escaped</i></th>
   1.269 + *       <td>Escaped octets, that is, triplets consisting of the percent
   1.270 + *           character (<tt>'%'</tt>) followed by two hexadecimal digits
   1.271 + *           (<tt>'0'</tt>-<tt>'9'</tt>, <tt>'A'</tt>-<tt>'F'</tt>, and
   1.272 + *           <tt>'a'</tt>-<tt>'f'</tt>)</td></tr>
   1.273 + *   <tr><th valign=top><i>other</i></th>
   1.274 + *       <td>The Unicode characters that are not in the US-ASCII character set,
   1.275 + *           are not control characters (according to the {@link
   1.276 + *           java.lang.Character#isISOControl(char) Character.isISOControl}
   1.277 + *           method), and are not space characters (according to the {@link
   1.278 + *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
   1.279 + *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is
   1.280 + *           limited to US-ASCII)</i></td></tr>
   1.281 + * </table></blockquote>
   1.282 + *
   1.283 + * <p><a name="legal-chars"></a> The set of all legal URI characters consists of
   1.284 + * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
   1.285 + * characters.
   1.286 + *
   1.287 + *
   1.288 + * <h4> Escaped octets, quotation, encoding, and decoding </h4>
   1.289 + *
   1.290 + * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
   1.291 + * fragment components.  Escaping serves two purposes in URIs:
   1.292 + *
   1.293 + * <ul>
   1.294 + *
   1.295 + *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
   1.296 + *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>
   1.297 + *   characters.  </p></li>
   1.298 + *
   1.299 + *   <li><p> To <i>quote</i> characters that are otherwise illegal in a
   1.300 + *   component.  The user-info, path, query, and fragment components differ
   1.301 + *   slightly in terms of which characters are considered legal and illegal.
   1.302 + *   </p></li>
   1.303 + *
   1.304 + * </ul>
   1.305 + *
   1.306 + * These purposes are served in this class by three related operations:
   1.307 + *
   1.308 + * <ul>
   1.309 + *
   1.310 + *   <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
   1.311 + *   with the sequence of escaped octets that represent that character in the
   1.312 + *   UTF-8 character set.  The Euro currency symbol (<tt>'&#92;u20AC'</tt>),
   1.313 + *   for example, is encoded as <tt>"%E2%82%AC"</tt>.  <i>(<b>Deviation from
   1.314 + *   RFC&nbsp;2396</b>, which does not specify any particular character
   1.315 + *   set.)</i> </p></li>
   1.316 + *
   1.317 + *   <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
   1.318 + *   encoding it.  The space character, for example, is quoted by replacing it
   1.319 + *   with <tt>"%20"</tt>.  UTF-8 contains US-ASCII, hence for US-ASCII
   1.320 + *   characters this transformation has exactly the effect required by
   1.321 + *   RFC&nbsp;2396. </p></li>
   1.322 + *
   1.323 + *   <li><p><a name="decode"></a>
   1.324 + *   A sequence of escaped octets is <i>decoded</i> by
   1.325 + *   replacing it with the sequence of characters that it represents in the
   1.326 + *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the
   1.327 + *   effect of de-quoting any quoted US-ASCII characters as well as that of
   1.328 + *   decoding any encoded non-US-ASCII characters.  If a <a
   1.329 + *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
   1.330 + *   when decoding the escaped octets then the erroneous octets are replaced by
   1.331 + *   <tt>'&#92;uFFFD'</tt>, the Unicode replacement character.  </p></li>
   1.332 + *
   1.333 + * </ul>
   1.334 + *
   1.335 + * These operations are exposed in the constructors and methods of this class
   1.336 + * as follows:
   1.337 + *
   1.338 + * <ul>
   1.339 + *
   1.340 + *   <li><p> The {@link #URI(java.lang.String) <code>single-argument
   1.341 + *   constructor</code>} requires any illegal characters in its argument to be
   1.342 + *   quoted and preserves any escaped octets and <i>other</i> characters that
   1.343 + *   are present.  </p></li>
   1.344 + *
   1.345 + *   <li><p> The {@link
   1.346 + *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
   1.347 + *   <code>multi-argument constructors</code>} quote illegal characters as
   1.348 + *   required by the components in which they appear.  The percent character
   1.349 + *   (<tt>'%'</tt>) is always quoted by these constructors.  Any <i>other</i>
   1.350 + *   characters are preserved.  </p></li>
   1.351 + *
   1.352 + *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
   1.353 + *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
   1.354 + *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
   1.355 + *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
   1.356 + *   values of their corresponding components in raw form, without interpreting
   1.357 + *   any escaped octets.  The strings returned by these methods may contain
   1.358 + *   both escaped octets and <i>other</i> characters, and will not contain any
   1.359 + *   illegal characters.  </p></li>
   1.360 + *
   1.361 + *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
   1.362 + *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
   1.363 + *   getFragment}, {@link #getAuthority() getAuthority}, and {@link
   1.364 + *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
   1.365 + *   octets in their corresponding components.  The strings returned by these
   1.366 + *   methods may contain both <i>other</i> characters and illegal characters,
   1.367 + *   and will not contain any escaped octets.  </p></li>
   1.368 + *
   1.369 + *   <li><p> The {@link #toString() toString} method returns a URI string with
   1.370 + *   all necessary quotation but which may contain <i>other</i> characters.
   1.371 + *   </p></li>
   1.372 + *
   1.373 + *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
   1.374 + *   quoted and encoded URI string that does not contain any <i>other</i>
   1.375 + *   characters.  </p></li>
   1.376 + *
   1.377 + * </ul>
   1.378 + *
   1.379 + *
   1.380 + * <h4> Identities </h4>
   1.381 + *
   1.382 + * For any URI <i>u</i>, it is always the case that
   1.383 + *
   1.384 + * <blockquote>
   1.385 + * <tt>new URI(</tt><i>u</i><tt>.toString()).equals(</tt><i>u</i><tt>)</tt>&nbsp;.
   1.386 + * </blockquote>
   1.387 + *
   1.388 + * For any URI <i>u</i> that does not contain redundant syntax such as two
   1.389 + * slashes before an empty authority (as in <tt>file:///tmp/</tt>&nbsp;) or a
   1.390 + * colon following a host name but no port (as in
   1.391 + * <tt>http://java.sun.com:</tt>&nbsp;), and that does not encode characters
   1.392 + * except those that must be quoted, the following identities also hold:
   1.393 + *
   1.394 + * <blockquote>
   1.395 + * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
   1.396 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getSchemeSpecificPart(),<br>
   1.397 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getFragment())<br>
   1.398 + * .equals(</tt><i>u</i><tt>)</tt>
   1.399 + * </blockquote>
   1.400 + *
   1.401 + * in all cases,
   1.402 + *
   1.403 + * <blockquote>
   1.404 + * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
   1.405 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getUserInfo(),&nbsp;</tt><i>u</i><tt>.getAuthority(),<br>
   1.406 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getPath(),&nbsp;</tt><i>u</i><tt>.getQuery(),<br>
   1.407 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getFragment())<br>
   1.408 + * .equals(</tt><i>u</i><tt>)</tt>
   1.409 + * </blockquote>
   1.410 + *
   1.411 + * if <i>u</i> is hierarchical, and
   1.412 + *
   1.413 + * <blockquote>
   1.414 + * <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
   1.415 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getUserInfo(),&nbsp;</tt><i>u</i><tt>.getHost(),&nbsp;</tt><i>u</i><tt>.getPort(),<br>
   1.416 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getPath(),&nbsp;</tt><i>u</i><tt>.getQuery(),<br>
   1.417 + * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt><i>u</i><tt>.getFragment())<br>
   1.418 + * .equals(</tt><i>u</i><tt>)</tt>
   1.419 + * </blockquote>
   1.420 + *
   1.421 + * if <i>u</i> is hierarchical and has either no authority or a server-based
   1.422 + * authority.
   1.423 + *
   1.424 + *
   1.425 + * <h4> URIs, URLs, and URNs </h4>
   1.426 + *
   1.427 + * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
   1.428 + * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but
   1.429 + * not every URI is a URL.  This is because there is another subcategory of
   1.430 + * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
   1.431 + * specify how to locate them.  The <tt>mailto</tt>, <tt>news</tt>, and
   1.432 + * <tt>isbn</tt> URIs shown above are examples of URNs.
   1.433 + *
   1.434 + * <p> The conceptual distinction between URIs and URLs is reflected in the
   1.435 + * differences between this class and the {@link URL} class.
   1.436 + *
   1.437 + * <p> An instance of this class represents a URI reference in the syntactic
   1.438 + * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.
   1.439 + * A URI string is parsed according to the generic syntax without regard to the
   1.440 + * scheme, if any, that it specifies.  No lookup of the host, if any, is
   1.441 + * performed, and no scheme-dependent stream handler is constructed.  Equality,
   1.442 + * hashing, and comparison are defined strictly in terms of the character
   1.443 + * content of the instance.  In other words, a URI instance is little more than
   1.444 + * a structured string that supports the syntactic, scheme-independent
   1.445 + * operations of comparison, normalization, resolution, and relativization.
   1.446 + *
   1.447 + * <p> An instance of the {@link URL} class, by contrast, represents the
   1.448 + * syntactic components of a URL together with some of the information required
   1.449 + * to access the resource that it describes.  A URL must be absolute, that is,
   1.450 + * it must always specify a scheme.  A URL string is parsed according to its
   1.451 + * scheme.  A stream handler is always established for a URL, and in fact it is
   1.452 + * impossible to create a URL instance for a scheme for which no handler is
   1.453 + * available.  Equality and hashing depend upon both the scheme and the
   1.454 + * Internet address of the host, if any; comparison is not defined.  In other
   1.455 + * words, a URL is a structured string that supports the syntactic operation of
   1.456 + * resolution as well as the network I/O operations of looking up the host and
   1.457 + * opening a connection to the specified resource.
   1.458 + *
   1.459 + *
   1.460 + * @author Mark Reinhold
   1.461 + * @since 1.4
   1.462 + *
   1.463 + * @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279: UTF-8, a
   1.464 + * transformation format of ISO 10646</i></a>, <br><a
   1.465 + * href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6 Addressing
   1.466 + * Architecture</i></a>, <br><a
   1.467 + * href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
   1.468 + * Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
   1.469 + * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
   1.470 + * Literal IPv6 Addresses in URLs</i></a>, <br><a
   1.471 + * href="URISyntaxException.html">URISyntaxException</a>
   1.472 + */
   1.473 +
   1.474 +public final class URI
   1.475 +    implements Comparable<URI>, Serializable
   1.476 +{
   1.477 +
   1.478 +    // Note: Comments containing the word "ASSERT" indicate places where a
   1.479 +    // throw of an InternalError should be replaced by an appropriate assertion
   1.480 +    // statement once asserts are enabled in the build.
   1.481 +
   1.482 +    static final long serialVersionUID = -6052424284110960213L;
   1.483 +
   1.484 +
   1.485 +    // -- Properties and components of this instance --
   1.486 +
   1.487 +    // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
   1.488 +    private transient String scheme;            // null ==> relative URI
   1.489 +    private transient String fragment;
   1.490 +
   1.491 +    // Hierarchical URI components: [//<authority>]<path>[?<query>]
   1.492 +    private transient String authority;         // Registry or server
   1.493 +
   1.494 +    // Server-based authority: [<userInfo>@]<host>[:<port>]
   1.495 +    private transient String userInfo;
   1.496 +    private transient String host;              // null ==> registry-based
   1.497 +    private transient int port = -1;            // -1 ==> undefined
   1.498 +
   1.499 +    // Remaining components of hierarchical URIs
   1.500 +    private transient String path;              // null ==> opaque
   1.501 +    private transient String query;
   1.502 +
   1.503 +    // The remaining fields may be computed on demand
   1.504 +
   1.505 +    private volatile transient String schemeSpecificPart;
   1.506 +    private volatile transient int hash;        // Zero ==> undefined
   1.507 +
   1.508 +    private volatile transient String decodedUserInfo = null;
   1.509 +    private volatile transient String decodedAuthority = null;
   1.510 +    private volatile transient String decodedPath = null;
   1.511 +    private volatile transient String decodedQuery = null;
   1.512 +    private volatile transient String decodedFragment = null;
   1.513 +    private volatile transient String decodedSchemeSpecificPart = null;
   1.514 +
   1.515 +    /**
   1.516 +     * The string form of this URI.
   1.517 +     *
   1.518 +     * @serial
   1.519 +     */
   1.520 +    private volatile String string;             // The only serializable field
   1.521 +
   1.522 +
   1.523 +
   1.524 +    // -- Constructors and factories --
   1.525 +
   1.526 +    private URI() { }                           // Used internally
   1.527 +
   1.528 +    /**
   1.529 +     * Constructs a URI by parsing the given string.
   1.530 +     *
   1.531 +     * <p> This constructor parses the given string exactly as specified by the
   1.532 +     * grammar in <a
   1.533 +     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
   1.534 +     * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>
   1.535 +     *
   1.536 +     * <ul type=disc>
   1.537 +     *
   1.538 +     *   <li><p> An empty authority component is permitted as long as it is
   1.539 +     *   followed by a non-empty path, a query component, or a fragment
   1.540 +     *   component.  This allows the parsing of URIs such as
   1.541 +     *   <tt>"file:///foo/bar"</tt>, which seems to be the intent of
   1.542 +     *   RFC&nbsp;2396 although the grammar does not permit it.  If the
   1.543 +     *   authority component is empty then the user-information, host, and port
   1.544 +     *   components are undefined. </p></li>
   1.545 +     *
   1.546 +     *   <li><p> Empty relative paths are permitted; this seems to be the
   1.547 +     *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The
   1.548 +     *   primary consequence of this deviation is that a standalone fragment
   1.549 +     *   such as <tt>"#foo"</tt> parses as a relative URI with an empty path
   1.550 +     *   and the given fragment, and can be usefully <a
   1.551 +     *   href="#resolve-frag">resolved</a> against a base URI.
   1.552 +     *
   1.553 +     *   <li><p> IPv4 addresses in host components are parsed rigorously, as
   1.554 +     *   specified by <a
   1.555 +     *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each
   1.556 +     *   element of a dotted-quad address must contain no more than three
   1.557 +     *   decimal digits.  Each element is further constrained to have a value
   1.558 +     *   no greater than 255. </p></li>
   1.559 +     *
   1.560 +     *   <li> <p> Hostnames in host components that comprise only a single
   1.561 +     *   domain label are permitted to start with an <i>alphanum</i>
   1.562 +     *   character. This seems to be the intent of <a
   1.563 +     *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
   1.564 +     *   section&nbsp;3.2.2 although the grammar does not permit it. The
   1.565 +     *   consequence of this deviation is that the authority component of a
   1.566 +     *   hierarchical URI such as <tt>s://123</tt>, will parse as a server-based
   1.567 +     *   authority. </p></li>
   1.568 +     *
   1.569 +     *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6
   1.570 +     *   address must be enclosed in square brackets (<tt>'['</tt> and
   1.571 +     *   <tt>']'</tt>) as specified by <a
   1.572 +     *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The
   1.573 +     *   IPv6 address itself must parse according to <a
   1.574 +     *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6
   1.575 +     *   addresses are further constrained to describe no more than sixteen
   1.576 +     *   bytes of address information, a constraint implicit in RFC&nbsp;2373
   1.577 +     *   but not expressible in the grammar. </p></li>
   1.578 +     *
   1.579 +     *   <li><p> Characters in the <i>other</i> category are permitted wherever
   1.580 +     *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the
   1.581 +     *   user-information, path, query, and fragment components, as well as in
   1.582 +     *   the authority component if the authority is registry-based.  This
   1.583 +     *   allows URIs to contain Unicode characters beyond those in the US-ASCII
   1.584 +     *   character set. </p></li>
   1.585 +     *
   1.586 +     * </ul>
   1.587 +     *
   1.588 +     * @param  str   The string to be parsed into a URI
   1.589 +     *
   1.590 +     * @throws  NullPointerException
   1.591 +     *          If <tt>str</tt> is <tt>null</tt>
   1.592 +     *
   1.593 +     * @throws  URISyntaxException
   1.594 +     *          If the given string violates RFC&nbsp;2396, as augmented
   1.595 +     *          by the above deviations
   1.596 +     */
   1.597 +    public URI(String str) throws URISyntaxException {
   1.598 +        new Parser(str).parse(false);
   1.599 +    }
   1.600 +
   1.601 +    /**
   1.602 +     * Constructs a hierarchical URI from the given components.
   1.603 +     *
   1.604 +     * <p> If a scheme is given then the path, if also given, must either be
   1.605 +     * empty or begin with a slash character (<tt>'/'</tt>).  Otherwise a
   1.606 +     * component of the new URI may be left undefined by passing <tt>null</tt>
   1.607 +     * for the corresponding parameter or, in the case of the <tt>port</tt>
   1.608 +     * parameter, by passing <tt>-1</tt>.
   1.609 +     *
   1.610 +     * <p> This constructor first builds a URI string from the given components
   1.611 +     * according to the rules specified in <a
   1.612 +     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
   1.613 +     * section&nbsp;5.2, step&nbsp;7: </p>
   1.614 +     *
   1.615 +     * <ol>
   1.616 +     *
   1.617 +     *   <li><p> Initially, the result string is empty. </p></li>
   1.618 +     *
   1.619 +     *   <li><p> If a scheme is given then it is appended to the result,
   1.620 +     *   followed by a colon character (<tt>':'</tt>).  </p></li>
   1.621 +     *
   1.622 +     *   <li><p> If user information, a host, or a port are given then the
   1.623 +     *   string <tt>"//"</tt> is appended.  </p></li>
   1.624 +     *
   1.625 +     *   <li><p> If user information is given then it is appended, followed by
   1.626 +     *   a commercial-at character (<tt>'@'</tt>).  Any character not in the
   1.627 +     *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
   1.628 +     *   categories is <a href="#quote">quoted</a>.  </p></li>
   1.629 +     *
   1.630 +     *   <li><p> If a host is given then it is appended.  If the host is a
   1.631 +     *   literal IPv6 address but is not enclosed in square brackets
   1.632 +     *   (<tt>'['</tt> and <tt>']'</tt>) then the square brackets are added.
   1.633 +     *   </p></li>
   1.634 +     *
   1.635 +     *   <li><p> If a port number is given then a colon character
   1.636 +     *   (<tt>':'</tt>) is appended, followed by the port number in decimal.
   1.637 +     *   </p></li>
   1.638 +     *
   1.639 +     *   <li><p> If a path is given then it is appended.  Any character not in
   1.640 +     *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
   1.641 +     *   categories, and not equal to the slash character (<tt>'/'</tt>) or the
   1.642 +     *   commercial-at character (<tt>'@'</tt>), is quoted.  </p></li>
   1.643 +     *
   1.644 +     *   <li><p> If a query is given then a question-mark character
   1.645 +     *   (<tt>'?'</tt>) is appended, followed by the query.  Any character that
   1.646 +     *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
   1.647 +     *   </p></li>
   1.648 +     *
   1.649 +     *   <li><p> Finally, if a fragment is given then a hash character
   1.650 +     *   (<tt>'#'</tt>) is appended, followed by the fragment.  Any character
   1.651 +     *   that is not a legal URI character is quoted.  </p></li>
   1.652 +     *
   1.653 +     * </ol>
   1.654 +     *
   1.655 +     * <p> The resulting URI string is then parsed as if by invoking the {@link
   1.656 +     * #URI(String)} constructor and then invoking the {@link
   1.657 +     * #parseServerAuthority()} method upon the result; this may cause a {@link
   1.658 +     * URISyntaxException} to be thrown.  </p>
   1.659 +     *
   1.660 +     * @param   scheme    Scheme name
   1.661 +     * @param   userInfo  User name and authorization information
   1.662 +     * @param   host      Host name
   1.663 +     * @param   port      Port number
   1.664 +     * @param   path      Path
   1.665 +     * @param   query     Query
   1.666 +     * @param   fragment  Fragment
   1.667 +     *
   1.668 +     * @throws URISyntaxException
   1.669 +     *         If both a scheme and a path are given but the path is relative,
   1.670 +     *         if the URI string constructed from the given components violates
   1.671 +     *         RFC&nbsp;2396, or if the authority component of the string is
   1.672 +     *         present but cannot be parsed as a server-based authority
   1.673 +     */
   1.674 +    public URI(String scheme,
   1.675 +               String userInfo, String host, int port,
   1.676 +               String path, String query, String fragment)
   1.677 +        throws URISyntaxException
   1.678 +    {
   1.679 +        String s = toString(scheme, null,
   1.680 +                            null, userInfo, host, port,
   1.681 +                            path, query, fragment);
   1.682 +        checkPath(s, scheme, path);
   1.683 +        new Parser(s).parse(true);
   1.684 +    }
   1.685 +
   1.686 +    /**
   1.687 +     * Constructs a hierarchical URI from the given components.
   1.688 +     *
   1.689 +     * <p> If a scheme is given then the path, if also given, must either be
   1.690 +     * empty or begin with a slash character (<tt>'/'</tt>).  Otherwise a
   1.691 +     * component of the new URI may be left undefined by passing <tt>null</tt>
   1.692 +     * for the corresponding parameter.
   1.693 +     *
   1.694 +     * <p> This constructor first builds a URI string from the given components
   1.695 +     * according to the rules specified in <a
   1.696 +     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
   1.697 +     * section&nbsp;5.2, step&nbsp;7: </p>
   1.698 +     *
   1.699 +     * <ol>
   1.700 +     *
   1.701 +     *   <li><p> Initially, the result string is empty.  </p></li>
   1.702 +     *
   1.703 +     *   <li><p> If a scheme is given then it is appended to the result,
   1.704 +     *   followed by a colon character (<tt>':'</tt>).  </p></li>
   1.705 +     *
   1.706 +     *   <li><p> If an authority is given then the string <tt>"//"</tt> is
   1.707 +     *   appended, followed by the authority.  If the authority contains a
   1.708 +     *   literal IPv6 address then the address must be enclosed in square
   1.709 +     *   brackets (<tt>'['</tt> and <tt>']'</tt>).  Any character not in the
   1.710 +     *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
   1.711 +     *   categories, and not equal to the commercial-at character
   1.712 +     *   (<tt>'@'</tt>), is <a href="#quote">quoted</a>.  </p></li>
   1.713 +     *
   1.714 +     *   <li><p> If a path is given then it is appended.  Any character not in
   1.715 +     *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
   1.716 +     *   categories, and not equal to the slash character (<tt>'/'</tt>) or the
   1.717 +     *   commercial-at character (<tt>'@'</tt>), is quoted.  </p></li>
   1.718 +     *
   1.719 +     *   <li><p> If a query is given then a question-mark character
   1.720 +     *   (<tt>'?'</tt>) is appended, followed by the query.  Any character that
   1.721 +     *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
   1.722 +     *   </p></li>
   1.723 +     *
   1.724 +     *   <li><p> Finally, if a fragment is given then a hash character
   1.725 +     *   (<tt>'#'</tt>) is appended, followed by the fragment.  Any character
   1.726 +     *   that is not a legal URI character is quoted.  </p></li>
   1.727 +     *
   1.728 +     * </ol>
   1.729 +     *
   1.730 +     * <p> The resulting URI string is then parsed as if by invoking the {@link
   1.731 +     * #URI(String)} constructor and then invoking the {@link
   1.732 +     * #parseServerAuthority()} method upon the result; this may cause a {@link
   1.733 +     * URISyntaxException} to be thrown.  </p>
   1.734 +     *
   1.735 +     * @param   scheme     Scheme name
   1.736 +     * @param   authority  Authority
   1.737 +     * @param   path       Path
   1.738 +     * @param   query      Query
   1.739 +     * @param   fragment   Fragment
   1.740 +     *
   1.741 +     * @throws URISyntaxException
   1.742 +     *         If both a scheme and a path are given but the path is relative,
   1.743 +     *         if the URI string constructed from the given components violates
   1.744 +     *         RFC&nbsp;2396, or if the authority component of the string is
   1.745 +     *         present but cannot be parsed as a server-based authority
   1.746 +     */
   1.747 +    public URI(String scheme,
   1.748 +               String authority,
   1.749 +               String path, String query, String fragment)
   1.750 +        throws URISyntaxException
   1.751 +    {
   1.752 +        String s = toString(scheme, null,
   1.753 +                            authority, null, null, -1,
   1.754 +                            path, query, fragment);
   1.755 +        checkPath(s, scheme, path);
   1.756 +        new Parser(s).parse(false);
   1.757 +    }
   1.758 +
   1.759 +    /**
   1.760 +     * Constructs a hierarchical URI from the given components.
   1.761 +     *
   1.762 +     * <p> A component may be left undefined by passing <tt>null</tt>.
   1.763 +     *
   1.764 +     * <p> This convenience constructor works as if by invoking the
   1.765 +     * seven-argument constructor as follows:
   1.766 +     *
   1.767 +     * <blockquote><tt>
   1.768 +     * new&nbsp;{@link #URI(String, String, String, int, String, String, String)
   1.769 +     * URI}(scheme,&nbsp;null,&nbsp;host,&nbsp;-1,&nbsp;path,&nbsp;null,&nbsp;fragment);
   1.770 +     * </tt></blockquote>
   1.771 +     *
   1.772 +     * @param   scheme    Scheme name
   1.773 +     * @param   host      Host name
   1.774 +     * @param   path      Path
   1.775 +     * @param   fragment  Fragment
   1.776 +     *
   1.777 +     * @throws  URISyntaxException
   1.778 +     *          If the URI string constructed from the given components
   1.779 +     *          violates RFC&nbsp;2396
   1.780 +     */
   1.781 +    public URI(String scheme, String host, String path, String fragment)
   1.782 +        throws URISyntaxException
   1.783 +    {
   1.784 +        this(scheme, null, host, -1, path, null, fragment);
   1.785 +    }
   1.786 +
   1.787 +    /**
   1.788 +     * Constructs a URI from the given components.
   1.789 +     *
   1.790 +     * <p> A component may be left undefined by passing <tt>null</tt>.
   1.791 +     *
   1.792 +     * <p> This constructor first builds a URI in string form using the given
   1.793 +     * components as follows:  </p>
   1.794 +     *
   1.795 +     * <ol>
   1.796 +     *
   1.797 +     *   <li><p> Initially, the result string is empty.  </p></li>
   1.798 +     *
   1.799 +     *   <li><p> If a scheme is given then it is appended to the result,
   1.800 +     *   followed by a colon character (<tt>':'</tt>).  </p></li>
   1.801 +     *
   1.802 +     *   <li><p> If a scheme-specific part is given then it is appended.  Any
   1.803 +     *   character that is not a <a href="#legal-chars">legal URI character</a>
   1.804 +     *   is <a href="#quote">quoted</a>.  </p></li>
   1.805 +     *
   1.806 +     *   <li><p> Finally, if a fragment is given then a hash character
   1.807 +     *   (<tt>'#'</tt>) is appended to the string, followed by the fragment.
   1.808 +     *   Any character that is not a legal URI character is quoted.  </p></li>
   1.809 +     *
   1.810 +     * </ol>
   1.811 +     *
   1.812 +     * <p> The resulting URI string is then parsed in order to create the new
   1.813 +     * URI instance as if by invoking the {@link #URI(String)} constructor;
   1.814 +     * this may cause a {@link URISyntaxException} to be thrown.  </p>
   1.815 +     *
   1.816 +     * @param   scheme    Scheme name
   1.817 +     * @param   ssp       Scheme-specific part
   1.818 +     * @param   fragment  Fragment
   1.819 +     *
   1.820 +     * @throws  URISyntaxException
   1.821 +     *          If the URI string constructed from the given components
   1.822 +     *          violates RFC&nbsp;2396
   1.823 +     */
   1.824 +    public URI(String scheme, String ssp, String fragment)
   1.825 +        throws URISyntaxException
   1.826 +    {
   1.827 +        new Parser(toString(scheme, ssp,
   1.828 +                            null, null, null, -1,
   1.829 +                            null, null, fragment))
   1.830 +            .parse(false);
   1.831 +    }
   1.832 +
   1.833 +    /**
   1.834 +     * Creates a URI by parsing the given string.
   1.835 +     *
   1.836 +     * <p> This convenience factory method works as if by invoking the {@link
   1.837 +     * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
   1.838 +     * constructor is caught and wrapped in a new {@link
   1.839 +     * IllegalArgumentException} object, which is then thrown.
   1.840 +     *
   1.841 +     * <p> This method is provided for use in situations where it is known that
   1.842 +     * the given string is a legal URI, for example for URI constants declared
   1.843 +     * within in a program, and so it would be considered a programming error
   1.844 +     * for the string not to parse as such.  The constructors, which throw
   1.845 +     * {@link URISyntaxException} directly, should be used situations where a
   1.846 +     * URI is being constructed from user input or from some other source that
   1.847 +     * may be prone to errors.  </p>
   1.848 +     *
   1.849 +     * @param  str   The string to be parsed into a URI
   1.850 +     * @return The new URI
   1.851 +     *
   1.852 +     * @throws  NullPointerException
   1.853 +     *          If <tt>str</tt> is <tt>null</tt>
   1.854 +     *
   1.855 +     * @throws  IllegalArgumentException
   1.856 +     *          If the given string violates RFC&nbsp;2396
   1.857 +     */
   1.858 +    public static URI create(String str) {
   1.859 +        try {
   1.860 +            return new URI(str);
   1.861 +        } catch (URISyntaxException x) {
   1.862 +            throw new IllegalArgumentException(x.getMessage(), x);
   1.863 +        }
   1.864 +    }
   1.865 +
   1.866 +
   1.867 +    // -- Operations --
   1.868 +
   1.869 +    /**
   1.870 +     * Attempts to parse this URI's authority component, if defined, into
   1.871 +     * user-information, host, and port components.
   1.872 +     *
   1.873 +     * <p> If this URI's authority component has already been recognized as
   1.874 +     * being server-based then it will already have been parsed into
   1.875 +     * user-information, host, and port components.  In this case, or if this
   1.876 +     * URI has no authority component, this method simply returns this URI.
   1.877 +     *
   1.878 +     * <p> Otherwise this method attempts once more to parse the authority
   1.879 +     * component into user-information, host, and port components, and throws
   1.880 +     * an exception describing why the authority component could not be parsed
   1.881 +     * in that way.
   1.882 +     *
   1.883 +     * <p> This method is provided because the generic URI syntax specified in
   1.884 +     * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
   1.885 +     * cannot always distinguish a malformed server-based authority from a
   1.886 +     * legitimate registry-based authority.  It must therefore treat some
   1.887 +     * instances of the former as instances of the latter.  The authority
   1.888 +     * component in the URI string <tt>"//foo:bar"</tt>, for example, is not a
   1.889 +     * legal server-based authority but it is legal as a registry-based
   1.890 +     * authority.
   1.891 +     *
   1.892 +     * <p> In many common situations, for example when working URIs that are
   1.893 +     * known to be either URNs or URLs, the hierarchical URIs being used will
   1.894 +     * always be server-based.  They therefore must either be parsed as such or
   1.895 +     * treated as an error.  In these cases a statement such as
   1.896 +     *
   1.897 +     * <blockquote>
   1.898 +     * <tt>URI </tt><i>u</i><tt> = new URI(str).parseServerAuthority();</tt>
   1.899 +     * </blockquote>
   1.900 +     *
   1.901 +     * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
   1.902 +     * it has an authority component, has a server-based authority with proper
   1.903 +     * user-information, host, and port components.  Invoking this method also
   1.904 +     * ensures that if the authority could not be parsed in that way then an
   1.905 +     * appropriate diagnostic message can be issued based upon the exception
   1.906 +     * that is thrown. </p>
   1.907 +     *
   1.908 +     * @return  A URI whose authority field has been parsed
   1.909 +     *          as a server-based authority
   1.910 +     *
   1.911 +     * @throws  URISyntaxException
   1.912 +     *          If the authority component of this URI is defined
   1.913 +     *          but cannot be parsed as a server-based authority
   1.914 +     *          according to RFC&nbsp;2396
   1.915 +     */
   1.916 +    public URI parseServerAuthority()
   1.917 +        throws URISyntaxException
   1.918 +    {
   1.919 +        // We could be clever and cache the error message and index from the
   1.920 +        // exception thrown during the original parse, but that would require
   1.921 +        // either more fields or a more-obscure representation.
   1.922 +        if ((host != null) || (authority == null))
   1.923 +            return this;
   1.924 +        defineString();
   1.925 +        new Parser(string).parse(true);
   1.926 +        return this;
   1.927 +    }
   1.928 +
   1.929 +    /**
   1.930 +     * Normalizes this URI's path.
   1.931 +     *
   1.932 +     * <p> If this URI is opaque, or if its path is already in normal form,
   1.933 +     * then this URI is returned.  Otherwise a new URI is constructed that is
   1.934 +     * identical to this URI except that its path is computed by normalizing
   1.935 +     * this URI's path in a manner consistent with <a
   1.936 +     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
   1.937 +     * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:
   1.938 +     * </p>
   1.939 +     *
   1.940 +     * <ol>
   1.941 +     *
   1.942 +     *   <li><p> All <tt>"."</tt> segments are removed. </p></li>
   1.943 +     *
   1.944 +     *   <li><p> If a <tt>".."</tt> segment is preceded by a non-<tt>".."</tt>
   1.945 +     *   segment then both of these segments are removed.  This step is
   1.946 +     *   repeated until it is no longer applicable. </p></li>
   1.947 +     *
   1.948 +     *   <li><p> If the path is relative, and if its first segment contains a
   1.949 +     *   colon character (<tt>':'</tt>), then a <tt>"."</tt> segment is
   1.950 +     *   prepended.  This prevents a relative URI with a path such as
   1.951 +     *   <tt>"a:b/c/d"</tt> from later being re-parsed as an opaque URI with a
   1.952 +     *   scheme of <tt>"a"</tt> and a scheme-specific part of <tt>"b/c/d"</tt>.
   1.953 +     *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>
   1.954 +     *
   1.955 +     * </ol>
   1.956 +     *
   1.957 +     * <p> A normalized path will begin with one or more <tt>".."</tt> segments
   1.958 +     * if there were insufficient non-<tt>".."</tt> segments preceding them to
   1.959 +     * allow their removal.  A normalized path will begin with a <tt>"."</tt>
   1.960 +     * segment if one was inserted by step 3 above.  Otherwise, a normalized
   1.961 +     * path will not contain any <tt>"."</tt> or <tt>".."</tt> segments. </p>
   1.962 +     *
   1.963 +     * @return  A URI equivalent to this URI,
   1.964 +     *          but whose path is in normal form
   1.965 +     */
   1.966 +    public URI normalize() {
   1.967 +        return normalize(this);
   1.968 +    }
   1.969 +
   1.970 +    /**
   1.971 +     * Resolves the given URI against this URI.
   1.972 +     *
   1.973 +     * <p> If the given URI is already absolute, or if this URI is opaque, then
   1.974 +     * the given URI is returned.
   1.975 +     *
   1.976 +     * <p><a name="resolve-frag"></a> If the given URI's fragment component is
   1.977 +     * defined, its path component is empty, and its scheme, authority, and
   1.978 +     * query components are undefined, then a URI with the given fragment but
   1.979 +     * with all other components equal to those of this URI is returned.  This
   1.980 +     * allows a URI representing a standalone fragment reference, such as
   1.981 +     * <tt>"#foo"</tt>, to be usefully resolved against a base URI.
   1.982 +     *
   1.983 +     * <p> Otherwise this method constructs a new hierarchical URI in a manner
   1.984 +     * consistent with <a
   1.985 +     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
   1.986 +     * section&nbsp;5.2; that is: </p>
   1.987 +     *
   1.988 +     * <ol>
   1.989 +     *
   1.990 +     *   <li><p> A new URI is constructed with this URI's scheme and the given
   1.991 +     *   URI's query and fragment components. </p></li>
   1.992 +     *
   1.993 +     *   <li><p> If the given URI has an authority component then the new URI's
   1.994 +     *   authority and path are taken from the given URI. </p></li>
   1.995 +     *
   1.996 +     *   <li><p> Otherwise the new URI's authority component is copied from
   1.997 +     *   this URI, and its path is computed as follows: </p>
   1.998 +     *
   1.999 +     *   <ol type=a>
  1.1000 +     *
  1.1001 +     *     <li><p> If the given URI's path is absolute then the new URI's path
  1.1002 +     *     is taken from the given URI. </p></li>
  1.1003 +     *
  1.1004 +     *     <li><p> Otherwise the given URI's path is relative, and so the new
  1.1005 +     *     URI's path is computed by resolving the path of the given URI
  1.1006 +     *     against the path of this URI.  This is done by concatenating all but
  1.1007 +     *     the last segment of this URI's path, if any, with the given URI's
  1.1008 +     *     path and then normalizing the result as if by invoking the {@link
  1.1009 +     *     #normalize() normalize} method. </p></li>
  1.1010 +     *
  1.1011 +     *   </ol></li>
  1.1012 +     *
  1.1013 +     * </ol>
  1.1014 +     *
  1.1015 +     * <p> The result of this method is absolute if, and only if, either this
  1.1016 +     * URI is absolute or the given URI is absolute.  </p>
  1.1017 +     *
  1.1018 +     * @param  uri  The URI to be resolved against this URI
  1.1019 +     * @return The resulting URI
  1.1020 +     *
  1.1021 +     * @throws  NullPointerException
  1.1022 +     *          If <tt>uri</tt> is <tt>null</tt>
  1.1023 +     */
  1.1024 +    public URI resolve(URI uri) {
  1.1025 +        return resolve(this, uri);
  1.1026 +    }
  1.1027 +
  1.1028 +    /**
  1.1029 +     * Constructs a new URI by parsing the given string and then resolving it
  1.1030 +     * against this URI.
  1.1031 +     *
  1.1032 +     * <p> This convenience method works as if invoking it were equivalent to
  1.1033 +     * evaluating the expression <tt>{@link #resolve(java.net.URI)
  1.1034 +     * resolve}(URI.{@link #create(String) create}(str))</tt>. </p>
  1.1035 +     *
  1.1036 +     * @param  str   The string to be parsed into a URI
  1.1037 +     * @return The resulting URI
  1.1038 +     *
  1.1039 +     * @throws  NullPointerException
  1.1040 +     *          If <tt>str</tt> is <tt>null</tt>
  1.1041 +     *
  1.1042 +     * @throws  IllegalArgumentException
  1.1043 +     *          If the given string violates RFC&nbsp;2396
  1.1044 +     */
  1.1045 +    public URI resolve(String str) {
  1.1046 +        return resolve(URI.create(str));
  1.1047 +    }
  1.1048 +
  1.1049 +    /**
  1.1050 +     * Relativizes the given URI against this URI.
  1.1051 +     *
  1.1052 +     * <p> The relativization of the given URI against this URI is computed as
  1.1053 +     * follows: </p>
  1.1054 +     *
  1.1055 +     * <ol>
  1.1056 +     *
  1.1057 +     *   <li><p> If either this URI or the given URI are opaque, or if the
  1.1058 +     *   scheme and authority components of the two URIs are not identical, or
  1.1059 +     *   if the path of this URI is not a prefix of the path of the given URI,
  1.1060 +     *   then the given URI is returned. </p></li>
  1.1061 +     *
  1.1062 +     *   <li><p> Otherwise a new relative hierarchical URI is constructed with
  1.1063 +     *   query and fragment components taken from the given URI and with a path
  1.1064 +     *   component computed by removing this URI's path from the beginning of
  1.1065 +     *   the given URI's path. </p></li>
  1.1066 +     *
  1.1067 +     * </ol>
  1.1068 +     *
  1.1069 +     * @param  uri  The URI to be relativized against this URI
  1.1070 +     * @return The resulting URI
  1.1071 +     *
  1.1072 +     * @throws  NullPointerException
  1.1073 +     *          If <tt>uri</tt> is <tt>null</tt>
  1.1074 +     */
  1.1075 +    public URI relativize(URI uri) {
  1.1076 +        return relativize(this, uri);
  1.1077 +    }
  1.1078 +
  1.1079 +    /**
  1.1080 +     * Constructs a URL from this URI.
  1.1081 +     *
  1.1082 +     * <p> This convenience method works as if invoking it were equivalent to
  1.1083 +     * evaluating the expression <tt>new&nbsp;URL(this.toString())</tt> after
  1.1084 +     * first checking that this URI is absolute. </p>
  1.1085 +     *
  1.1086 +     * @return  A URL constructed from this URI
  1.1087 +     *
  1.1088 +     * @throws  IllegalArgumentException
  1.1089 +     *          If this URL is not absolute
  1.1090 +     *
  1.1091 +     * @throws  MalformedURLException
  1.1092 +     *          If a protocol handler for the URL could not be found,
  1.1093 +     *          or if some other error occurred while constructing the URL
  1.1094 +     */
  1.1095 +    public URL toURL()
  1.1096 +        throws MalformedURLException {
  1.1097 +        if (!isAbsolute())
  1.1098 +            throw new IllegalArgumentException("URI is not absolute");
  1.1099 +        return new URL(toString());
  1.1100 +    }
  1.1101 +
  1.1102 +    // -- Component access methods --
  1.1103 +
  1.1104 +    /**
  1.1105 +     * Returns the scheme component of this URI.
  1.1106 +     *
  1.1107 +     * <p> The scheme component of a URI, if defined, only contains characters
  1.1108 +     * in the <i>alphanum</i> category and in the string <tt>"-.+"</tt>.  A
  1.1109 +     * scheme always starts with an <i>alpha</i> character. <p>
  1.1110 +     *
  1.1111 +     * The scheme component of a URI cannot contain escaped octets, hence this
  1.1112 +     * method does not perform any decoding.
  1.1113 +     *
  1.1114 +     * @return  The scheme component of this URI,
  1.1115 +     *          or <tt>null</tt> if the scheme is undefined
  1.1116 +     */
  1.1117 +    public String getScheme() {
  1.1118 +        return scheme;
  1.1119 +    }
  1.1120 +
  1.1121 +    /**
  1.1122 +     * Tells whether or not this URI is absolute.
  1.1123 +     *
  1.1124 +     * <p> A URI is absolute if, and only if, it has a scheme component. </p>
  1.1125 +     *
  1.1126 +     * @return  <tt>true</tt> if, and only if, this URI is absolute
  1.1127 +     */
  1.1128 +    public boolean isAbsolute() {
  1.1129 +        return scheme != null;
  1.1130 +    }
  1.1131 +
  1.1132 +    /**
  1.1133 +     * Tells whether or not this URI is opaque.
  1.1134 +     *
  1.1135 +     * <p> A URI is opaque if, and only if, it is absolute and its
  1.1136 +     * scheme-specific part does not begin with a slash character ('/').
  1.1137 +     * An opaque URI has a scheme, a scheme-specific part, and possibly
  1.1138 +     * a fragment; all other components are undefined. </p>
  1.1139 +     *
  1.1140 +     * @return  <tt>true</tt> if, and only if, this URI is opaque
  1.1141 +     */
  1.1142 +    public boolean isOpaque() {
  1.1143 +        return path == null;
  1.1144 +    }
  1.1145 +
  1.1146 +    /**
  1.1147 +     * Returns the raw scheme-specific part of this URI.  The scheme-specific
  1.1148 +     * part is never undefined, though it may be empty.
  1.1149 +     *
  1.1150 +     * <p> The scheme-specific part of a URI only contains legal URI
  1.1151 +     * characters. </p>
  1.1152 +     *
  1.1153 +     * @return  The raw scheme-specific part of this URI
  1.1154 +     *          (never <tt>null</tt>)
  1.1155 +     */
  1.1156 +    public String getRawSchemeSpecificPart() {
  1.1157 +        defineSchemeSpecificPart();
  1.1158 +        return schemeSpecificPart;
  1.1159 +    }
  1.1160 +
  1.1161 +    /**
  1.1162 +     * Returns the decoded scheme-specific part of this URI.
  1.1163 +     *
  1.1164 +     * <p> The string returned by this method is equal to that returned by the
  1.1165 +     * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
  1.1166 +     * except that all sequences of escaped octets are <a
  1.1167 +     * href="#decode">decoded</a>.  </p>
  1.1168 +     *
  1.1169 +     * @return  The decoded scheme-specific part of this URI
  1.1170 +     *          (never <tt>null</tt>)
  1.1171 +     */
  1.1172 +    public String getSchemeSpecificPart() {
  1.1173 +        if (decodedSchemeSpecificPart == null)
  1.1174 +            decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
  1.1175 +        return decodedSchemeSpecificPart;
  1.1176 +    }
  1.1177 +
  1.1178 +    /**
  1.1179 +     * Returns the raw authority component of this URI.
  1.1180 +     *
  1.1181 +     * <p> The authority component of a URI, if defined, only contains the
  1.1182 +     * commercial-at character (<tt>'@'</tt>) and characters in the
  1.1183 +     * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
  1.1184 +     * categories.  If the authority is server-based then it is further
  1.1185 +     * constrained to have valid user-information, host, and port
  1.1186 +     * components. </p>
  1.1187 +     *
  1.1188 +     * @return  The raw authority component of this URI,
  1.1189 +     *          or <tt>null</tt> if the authority is undefined
  1.1190 +     */
  1.1191 +    public String getRawAuthority() {
  1.1192 +        return authority;
  1.1193 +    }
  1.1194 +
  1.1195 +    /**
  1.1196 +     * Returns the decoded authority component of this URI.
  1.1197 +     *
  1.1198 +     * <p> The string returned by this method is equal to that returned by the
  1.1199 +     * {@link #getRawAuthority() getRawAuthority} method except that all
  1.1200 +     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
  1.1201 +     *
  1.1202 +     * @return  The decoded authority component of this URI,
  1.1203 +     *          or <tt>null</tt> if the authority is undefined
  1.1204 +     */
  1.1205 +    public String getAuthority() {
  1.1206 +        if (decodedAuthority == null)
  1.1207 +            decodedAuthority = decode(authority);
  1.1208 +        return decodedAuthority;
  1.1209 +    }
  1.1210 +
  1.1211 +    /**
  1.1212 +     * Returns the raw user-information component of this URI.
  1.1213 +     *
  1.1214 +     * <p> The user-information component of a URI, if defined, only contains
  1.1215 +     * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
  1.1216 +     * <i>other</i> categories. </p>
  1.1217 +     *
  1.1218 +     * @return  The raw user-information component of this URI,
  1.1219 +     *          or <tt>null</tt> if the user information is undefined
  1.1220 +     */
  1.1221 +    public String getRawUserInfo() {
  1.1222 +        return userInfo;
  1.1223 +    }
  1.1224 +
  1.1225 +    /**
  1.1226 +     * Returns the decoded user-information component of this URI.
  1.1227 +     *
  1.1228 +     * <p> The string returned by this method is equal to that returned by the
  1.1229 +     * {@link #getRawUserInfo() getRawUserInfo} method except that all
  1.1230 +     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
  1.1231 +     *
  1.1232 +     * @return  The decoded user-information component of this URI,
  1.1233 +     *          or <tt>null</tt> if the user information is undefined
  1.1234 +     */
  1.1235 +    public String getUserInfo() {
  1.1236 +        if ((decodedUserInfo == null) && (userInfo != null))
  1.1237 +            decodedUserInfo = decode(userInfo);
  1.1238 +        return decodedUserInfo;
  1.1239 +    }
  1.1240 +
  1.1241 +    /**
  1.1242 +     * Returns the host component of this URI.
  1.1243 +     *
  1.1244 +     * <p> The host component of a URI, if defined, will have one of the
  1.1245 +     * following forms: </p>
  1.1246 +     *
  1.1247 +     * <ul type=disc>
  1.1248 +     *
  1.1249 +     *   <li><p> A domain name consisting of one or more <i>labels</i>
  1.1250 +     *   separated by period characters (<tt>'.'</tt>), optionally followed by
  1.1251 +     *   a period character.  Each label consists of <i>alphanum</i> characters
  1.1252 +     *   as well as hyphen characters (<tt>'-'</tt>), though hyphens never
  1.1253 +     *   occur as the first or last characters in a label. The rightmost
  1.1254 +     *   label of a domain name consisting of two or more labels, begins
  1.1255 +     *   with an <i>alpha</i> character. </li>
  1.1256 +     *
  1.1257 +     *   <li><p> A dotted-quad IPv4 address of the form
  1.1258 +     *   <i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+</tt>,
  1.1259 +     *   where no <i>digit</i> sequence is longer than three characters and no
  1.1260 +     *   sequence has a value larger than 255. </p></li>
  1.1261 +     *
  1.1262 +     *   <li><p> An IPv6 address enclosed in square brackets (<tt>'['</tt> and
  1.1263 +     *   <tt>']'</tt>) and consisting of hexadecimal digits, colon characters
  1.1264 +     *   (<tt>':'</tt>), and possibly an embedded IPv4 address.  The full
  1.1265 +     *   syntax of IPv6 addresses is specified in <a
  1.1266 +     *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6
  1.1267 +     *   Addressing Architecture</i></a>.  </p></li>
  1.1268 +     *
  1.1269 +     * </ul>
  1.1270 +     *
  1.1271 +     * The host component of a URI cannot contain escaped octets, hence this
  1.1272 +     * method does not perform any decoding.
  1.1273 +     *
  1.1274 +     * @return  The host component of this URI,
  1.1275 +     *          or <tt>null</tt> if the host is undefined
  1.1276 +     */
  1.1277 +    public String getHost() {
  1.1278 +        return host;
  1.1279 +    }
  1.1280 +
  1.1281 +    /**
  1.1282 +     * Returns the port number of this URI.
  1.1283 +     *
  1.1284 +     * <p> The port component of a URI, if defined, is a non-negative
  1.1285 +     * integer. </p>
  1.1286 +     *
  1.1287 +     * @return  The port component of this URI,
  1.1288 +     *          or <tt>-1</tt> if the port is undefined
  1.1289 +     */
  1.1290 +    public int getPort() {
  1.1291 +        return port;
  1.1292 +    }
  1.1293 +
  1.1294 +    /**
  1.1295 +     * Returns the raw path component of this URI.
  1.1296 +     *
  1.1297 +     * <p> The path component of a URI, if defined, only contains the slash
  1.1298 +     * character (<tt>'/'</tt>), the commercial-at character (<tt>'@'</tt>),
  1.1299 +     * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
  1.1300 +     * and <i>other</i> categories. </p>
  1.1301 +     *
  1.1302 +     * @return  The path component of this URI,
  1.1303 +     *          or <tt>null</tt> if the path is undefined
  1.1304 +     */
  1.1305 +    public String getRawPath() {
  1.1306 +        return path;
  1.1307 +    }
  1.1308 +
  1.1309 +    /**
  1.1310 +     * Returns the decoded path component of this URI.
  1.1311 +     *
  1.1312 +     * <p> The string returned by this method is equal to that returned by the
  1.1313 +     * {@link #getRawPath() getRawPath} method except that all sequences of
  1.1314 +     * escaped octets are <a href="#decode">decoded</a>.  </p>
  1.1315 +     *
  1.1316 +     * @return  The decoded path component of this URI,
  1.1317 +     *          or <tt>null</tt> if the path is undefined
  1.1318 +     */
  1.1319 +    public String getPath() {
  1.1320 +        if ((decodedPath == null) && (path != null))
  1.1321 +            decodedPath = decode(path);
  1.1322 +        return decodedPath;
  1.1323 +    }
  1.1324 +
  1.1325 +    /**
  1.1326 +     * Returns the raw query component of this URI.
  1.1327 +     *
  1.1328 +     * <p> The query component of a URI, if defined, only contains legal URI
  1.1329 +     * characters. </p>
  1.1330 +     *
  1.1331 +     * @return  The raw query component of this URI,
  1.1332 +     *          or <tt>null</tt> if the query is undefined
  1.1333 +     */
  1.1334 +    public String getRawQuery() {
  1.1335 +        return query;
  1.1336 +    }
  1.1337 +
  1.1338 +    /**
  1.1339 +     * Returns the decoded query component of this URI.
  1.1340 +     *
  1.1341 +     * <p> The string returned by this method is equal to that returned by the
  1.1342 +     * {@link #getRawQuery() getRawQuery} method except that all sequences of
  1.1343 +     * escaped octets are <a href="#decode">decoded</a>.  </p>
  1.1344 +     *
  1.1345 +     * @return  The decoded query component of this URI,
  1.1346 +     *          or <tt>null</tt> if the query is undefined
  1.1347 +     */
  1.1348 +    public String getQuery() {
  1.1349 +        if ((decodedQuery == null) && (query != null))
  1.1350 +            decodedQuery = decode(query);
  1.1351 +        return decodedQuery;
  1.1352 +    }
  1.1353 +
  1.1354 +    /**
  1.1355 +     * Returns the raw fragment component of this URI.
  1.1356 +     *
  1.1357 +     * <p> The fragment component of a URI, if defined, only contains legal URI
  1.1358 +     * characters. </p>
  1.1359 +     *
  1.1360 +     * @return  The raw fragment component of this URI,
  1.1361 +     *          or <tt>null</tt> if the fragment is undefined
  1.1362 +     */
  1.1363 +    public String getRawFragment() {
  1.1364 +        return fragment;
  1.1365 +    }
  1.1366 +
  1.1367 +    /**
  1.1368 +     * Returns the decoded fragment component of this URI.
  1.1369 +     *
  1.1370 +     * <p> The string returned by this method is equal to that returned by the
  1.1371 +     * {@link #getRawFragment() getRawFragment} method except that all
  1.1372 +     * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
  1.1373 +     *
  1.1374 +     * @return  The decoded fragment component of this URI,
  1.1375 +     *          or <tt>null</tt> if the fragment is undefined
  1.1376 +     */
  1.1377 +    public String getFragment() {
  1.1378 +        if ((decodedFragment == null) && (fragment != null))
  1.1379 +            decodedFragment = decode(fragment);
  1.1380 +        return decodedFragment;
  1.1381 +    }
  1.1382 +
  1.1383 +
  1.1384 +    // -- Equality, comparison, hash code, toString, and serialization --
  1.1385 +
  1.1386 +    /**
  1.1387 +     * Tests this URI for equality with another object.
  1.1388 +     *
  1.1389 +     * <p> If the given object is not a URI then this method immediately
  1.1390 +     * returns <tt>false</tt>.
  1.1391 +     *
  1.1392 +     * <p> For two URIs to be considered equal requires that either both are
  1.1393 +     * opaque or both are hierarchical.  Their schemes must either both be
  1.1394 +     * undefined or else be equal without regard to case. Their fragments
  1.1395 +     * must either both be undefined or else be equal.
  1.1396 +     *
  1.1397 +     * <p> For two opaque URIs to be considered equal, their scheme-specific
  1.1398 +     * parts must be equal.
  1.1399 +     *
  1.1400 +     * <p> For two hierarchical URIs to be considered equal, their paths must
  1.1401 +     * be equal and their queries must either both be undefined or else be
  1.1402 +     * equal.  Their authorities must either both be undefined, or both be
  1.1403 +     * registry-based, or both be server-based.  If their authorities are
  1.1404 +     * defined and are registry-based, then they must be equal.  If their
  1.1405 +     * authorities are defined and are server-based, then their hosts must be
  1.1406 +     * equal without regard to case, their port numbers must be equal, and
  1.1407 +     * their user-information components must be equal.
  1.1408 +     *
  1.1409 +     * <p> When testing the user-information, path, query, fragment, authority,
  1.1410 +     * or scheme-specific parts of two URIs for equality, the raw forms rather
  1.1411 +     * than the encoded forms of these components are compared and the
  1.1412 +     * hexadecimal digits of escaped octets are compared without regard to
  1.1413 +     * case.
  1.1414 +     *
  1.1415 +     * <p> This method satisfies the general contract of the {@link
  1.1416 +     * java.lang.Object#equals(Object) Object.equals} method. </p>
  1.1417 +     *
  1.1418 +     * @param   ob   The object to which this object is to be compared
  1.1419 +     *
  1.1420 +     * @return  <tt>true</tt> if, and only if, the given object is a URI that
  1.1421 +     *          is identical to this URI
  1.1422 +     */
  1.1423 +    public boolean equals(Object ob) {
  1.1424 +        if (ob == this)
  1.1425 +            return true;
  1.1426 +        if (!(ob instanceof URI))
  1.1427 +            return false;
  1.1428 +        URI that = (URI)ob;
  1.1429 +        if (this.isOpaque() != that.isOpaque()) return false;
  1.1430 +        if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
  1.1431 +        if (!equal(this.fragment, that.fragment)) return false;
  1.1432 +
  1.1433 +        // Opaque
  1.1434 +        if (this.isOpaque())
  1.1435 +            return equal(this.schemeSpecificPart, that.schemeSpecificPart);
  1.1436 +
  1.1437 +        // Hierarchical
  1.1438 +        if (!equal(this.path, that.path)) return false;
  1.1439 +        if (!equal(this.query, that.query)) return false;
  1.1440 +
  1.1441 +        // Authorities
  1.1442 +        if (this.authority == that.authority) return true;
  1.1443 +        if (this.host != null) {
  1.1444 +            // Server-based
  1.1445 +            if (!equal(this.userInfo, that.userInfo)) return false;
  1.1446 +            if (!equalIgnoringCase(this.host, that.host)) return false;
  1.1447 +            if (this.port != that.port) return false;
  1.1448 +        } else if (this.authority != null) {
  1.1449 +            // Registry-based
  1.1450 +            if (!equal(this.authority, that.authority)) return false;
  1.1451 +        } else if (this.authority != that.authority) {
  1.1452 +            return false;
  1.1453 +        }
  1.1454 +
  1.1455 +        return true;
  1.1456 +    }
  1.1457 +
  1.1458 +    /**
  1.1459 +     * Returns a hash-code value for this URI.  The hash code is based upon all
  1.1460 +     * of the URI's components, and satisfies the general contract of the
  1.1461 +     * {@link java.lang.Object#hashCode() Object.hashCode} method.
  1.1462 +     *
  1.1463 +     * @return  A hash-code value for this URI
  1.1464 +     */
  1.1465 +    public int hashCode() {
  1.1466 +        if (hash != 0)
  1.1467 +            return hash;
  1.1468 +        int h = hashIgnoringCase(0, scheme);
  1.1469 +        h = hash(h, fragment);
  1.1470 +        if (isOpaque()) {
  1.1471 +            h = hash(h, schemeSpecificPart);
  1.1472 +        } else {
  1.1473 +            h = hash(h, path);
  1.1474 +            h = hash(h, query);
  1.1475 +            if (host != null) {
  1.1476 +                h = hash(h, userInfo);
  1.1477 +                h = hashIgnoringCase(h, host);
  1.1478 +                h += 1949 * port;
  1.1479 +            } else {
  1.1480 +                h = hash(h, authority);
  1.1481 +            }
  1.1482 +        }
  1.1483 +        hash = h;
  1.1484 +        return h;
  1.1485 +    }
  1.1486 +
  1.1487 +    /**
  1.1488 +     * Compares this URI to another object, which must be a URI.
  1.1489 +     *
  1.1490 +     * <p> When comparing corresponding components of two URIs, if one
  1.1491 +     * component is undefined but the other is defined then the first is
  1.1492 +     * considered to be less than the second.  Unless otherwise noted, string
  1.1493 +     * components are ordered according to their natural, case-sensitive
  1.1494 +     * ordering as defined by the {@link java.lang.String#compareTo(Object)
  1.1495 +     * String.compareTo} method.  String components that are subject to
  1.1496 +     * encoding are compared by comparing their raw forms rather than their
  1.1497 +     * encoded forms.
  1.1498 +     *
  1.1499 +     * <p> The ordering of URIs is defined as follows: </p>
  1.1500 +     *
  1.1501 +     * <ul type=disc>
  1.1502 +     *
  1.1503 +     *   <li><p> Two URIs with different schemes are ordered according the
  1.1504 +     *   ordering of their schemes, without regard to case. </p></li>
  1.1505 +     *
  1.1506 +     *   <li><p> A hierarchical URI is considered to be less than an opaque URI
  1.1507 +     *   with an identical scheme. </p></li>
  1.1508 +     *
  1.1509 +     *   <li><p> Two opaque URIs with identical schemes are ordered according
  1.1510 +     *   to the ordering of their scheme-specific parts. </p></li>
  1.1511 +     *
  1.1512 +     *   <li><p> Two opaque URIs with identical schemes and scheme-specific
  1.1513 +     *   parts are ordered according to the ordering of their
  1.1514 +     *   fragments. </p></li>
  1.1515 +     *
  1.1516 +     *   <li><p> Two hierarchical URIs with identical schemes are ordered
  1.1517 +     *   according to the ordering of their authority components: </p>
  1.1518 +     *
  1.1519 +     *   <ul type=disc>
  1.1520 +     *
  1.1521 +     *     <li><p> If both authority components are server-based then the URIs
  1.1522 +     *     are ordered according to their user-information components; if these
  1.1523 +     *     components are identical then the URIs are ordered according to the
  1.1524 +     *     ordering of their hosts, without regard to case; if the hosts are
  1.1525 +     *     identical then the URIs are ordered according to the ordering of
  1.1526 +     *     their ports. </p></li>
  1.1527 +     *
  1.1528 +     *     <li><p> If one or both authority components are registry-based then
  1.1529 +     *     the URIs are ordered according to the ordering of their authority
  1.1530 +     *     components. </p></li>
  1.1531 +     *
  1.1532 +     *   </ul></li>
  1.1533 +     *
  1.1534 +     *   <li><p> Finally, two hierarchical URIs with identical schemes and
  1.1535 +     *   authority components are ordered according to the ordering of their
  1.1536 +     *   paths; if their paths are identical then they are ordered according to
  1.1537 +     *   the ordering of their queries; if the queries are identical then they
  1.1538 +     *   are ordered according to the order of their fragments. </p></li>
  1.1539 +     *
  1.1540 +     * </ul>
  1.1541 +     *
  1.1542 +     * <p> This method satisfies the general contract of the {@link
  1.1543 +     * java.lang.Comparable#compareTo(Object) Comparable.compareTo}
  1.1544 +     * method. </p>
  1.1545 +     *
  1.1546 +     * @param   that
  1.1547 +     *          The object to which this URI is to be compared
  1.1548 +     *
  1.1549 +     * @return  A negative integer, zero, or a positive integer as this URI is
  1.1550 +     *          less than, equal to, or greater than the given URI
  1.1551 +     *
  1.1552 +     * @throws  ClassCastException
  1.1553 +     *          If the given object is not a URI
  1.1554 +     */
  1.1555 +    public int compareTo(URI that) {
  1.1556 +        int c;
  1.1557 +
  1.1558 +        if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
  1.1559 +            return c;
  1.1560 +
  1.1561 +        if (this.isOpaque()) {
  1.1562 +            if (that.isOpaque()) {
  1.1563 +                // Both opaque
  1.1564 +                if ((c = compare(this.schemeSpecificPart,
  1.1565 +                                 that.schemeSpecificPart)) != 0)
  1.1566 +                    return c;
  1.1567 +                return compare(this.fragment, that.fragment);
  1.1568 +            }
  1.1569 +            return +1;                  // Opaque > hierarchical
  1.1570 +        } else if (that.isOpaque()) {
  1.1571 +            return -1;                  // Hierarchical < opaque
  1.1572 +        }
  1.1573 +
  1.1574 +        // Hierarchical
  1.1575 +        if ((this.host != null) && (that.host != null)) {
  1.1576 +            // Both server-based
  1.1577 +            if ((c = compare(this.userInfo, that.userInfo)) != 0)
  1.1578 +                return c;
  1.1579 +            if ((c = compareIgnoringCase(this.host, that.host)) != 0)
  1.1580 +                return c;
  1.1581 +            if ((c = this.port - that.port) != 0)
  1.1582 +                return c;
  1.1583 +        } else {
  1.1584 +            // If one or both authorities are registry-based then we simply
  1.1585 +            // compare them in the usual, case-sensitive way.  If one is
  1.1586 +            // registry-based and one is server-based then the strings are
  1.1587 +            // guaranteed to be unequal, hence the comparison will never return
  1.1588 +            // zero and the compareTo and equals methods will remain
  1.1589 +            // consistent.
  1.1590 +            if ((c = compare(this.authority, that.authority)) != 0) return c;
  1.1591 +        }
  1.1592 +
  1.1593 +        if ((c = compare(this.path, that.path)) != 0) return c;
  1.1594 +        if ((c = compare(this.query, that.query)) != 0) return c;
  1.1595 +        return compare(this.fragment, that.fragment);
  1.1596 +    }
  1.1597 +
  1.1598 +    /**
  1.1599 +     * Returns the content of this URI as a string.
  1.1600 +     *
  1.1601 +     * <p> If this URI was created by invoking one of the constructors in this
  1.1602 +     * class then a string equivalent to the original input string, or to the
  1.1603 +     * string computed from the originally-given components, as appropriate, is
  1.1604 +     * returned.  Otherwise this URI was created by normalization, resolution,
  1.1605 +     * or relativization, and so a string is constructed from this URI's
  1.1606 +     * components according to the rules specified in <a
  1.1607 +     * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
  1.1608 +     * section&nbsp;5.2, step&nbsp;7. </p>
  1.1609 +     *
  1.1610 +     * @return  The string form of this URI
  1.1611 +     */
  1.1612 +    public String toString() {
  1.1613 +        defineString();
  1.1614 +        return string;
  1.1615 +    }
  1.1616 +
  1.1617 +    /**
  1.1618 +     * Returns the content of this URI as a US-ASCII string.
  1.1619 +     *
  1.1620 +     * <p> If this URI does not contain any characters in the <i>other</i>
  1.1621 +     * category then an invocation of this method will return the same value as
  1.1622 +     * an invocation of the {@link #toString() toString} method.  Otherwise
  1.1623 +     * this method works as if by invoking that method and then <a
  1.1624 +     * href="#encode">encoding</a> the result.  </p>
  1.1625 +     *
  1.1626 +     * @return  The string form of this URI, encoded as needed
  1.1627 +     *          so that it only contains characters in the US-ASCII
  1.1628 +     *          charset
  1.1629 +     */
  1.1630 +    public String toASCIIString() {
  1.1631 +        defineString();
  1.1632 +        return encode(string);
  1.1633 +    }
  1.1634 +
  1.1635 +
  1.1636 +    // -- Serialization support --
  1.1637 +
  1.1638 +    /**
  1.1639 +     * Saves the content of this URI to the given serial stream.
  1.1640 +     *
  1.1641 +     * <p> The only serializable field of a URI instance is its <tt>string</tt>
  1.1642 +     * field.  That field is given a value, if it does not have one already,
  1.1643 +     * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
  1.1644 +     * method of the given object-output stream is invoked. </p>
  1.1645 +     *
  1.1646 +     * @param  os  The object-output stream to which this object
  1.1647 +     *             is to be written
  1.1648 +     */
  1.1649 +    private void writeObject(ObjectOutputStream os)
  1.1650 +        throws IOException
  1.1651 +    {
  1.1652 +        defineString();
  1.1653 +        os.defaultWriteObject();        // Writes the string field only
  1.1654 +    }
  1.1655 +
  1.1656 +    /**
  1.1657 +     * Reconstitutes a URI from the given serial stream.
  1.1658 +     *
  1.1659 +     * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
  1.1660 +     * invoked to read the value of the <tt>string</tt> field.  The result is
  1.1661 +     * then parsed in the usual way.
  1.1662 +     *
  1.1663 +     * @param  is  The object-input stream from which this object
  1.1664 +     *             is being read
  1.1665 +     */
  1.1666 +    private void readObject(ObjectInputStream is)
  1.1667 +        throws ClassNotFoundException, IOException
  1.1668 +    {
  1.1669 +        port = -1;                      // Argh
  1.1670 +        is.defaultReadObject();
  1.1671 +        try {
  1.1672 +            new Parser(string).parse(false);
  1.1673 +        } catch (URISyntaxException x) {
  1.1674 +            IOException y = new InvalidObjectException("Invalid URI");
  1.1675 +            y.initCause(x);
  1.1676 +            throw y;
  1.1677 +        }
  1.1678 +    }
  1.1679 +
  1.1680 +
  1.1681 +    // -- End of public methods --
  1.1682 +
  1.1683 +
  1.1684 +    // -- Utility methods for string-field comparison and hashing --
  1.1685 +
  1.1686 +    // These methods return appropriate values for null string arguments,
  1.1687 +    // thereby simplifying the equals, hashCode, and compareTo methods.
  1.1688 +    //
  1.1689 +    // The case-ignoring methods should only be applied to strings whose
  1.1690 +    // characters are all known to be US-ASCII.  Because of this restriction,
  1.1691 +    // these methods are faster than the similar methods in the String class.
  1.1692 +
  1.1693 +    // US-ASCII only
  1.1694 +    private static int toLower(char c) {
  1.1695 +        if ((c >= 'A') && (c <= 'Z'))
  1.1696 +            return c + ('a' - 'A');
  1.1697 +        return c;
  1.1698 +    }
  1.1699 +
  1.1700 +    private static boolean equal(String s, String t) {
  1.1701 +        if (s == t) return true;
  1.1702 +        if ((s != null) && (t != null)) {
  1.1703 +            if (s.length() != t.length())
  1.1704 +                return false;
  1.1705 +            if (s.indexOf('%') < 0)
  1.1706 +                return s.equals(t);
  1.1707 +            int n = s.length();
  1.1708 +            for (int i = 0; i < n;) {
  1.1709 +                char c = s.charAt(i);
  1.1710 +                char d = t.charAt(i);
  1.1711 +                if (c != '%') {
  1.1712 +                    if (c != d)
  1.1713 +                        return false;
  1.1714 +                    i++;
  1.1715 +                    continue;
  1.1716 +                }
  1.1717 +                i++;
  1.1718 +                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
  1.1719 +                    return false;
  1.1720 +                i++;
  1.1721 +                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
  1.1722 +                    return false;
  1.1723 +                i++;
  1.1724 +            }
  1.1725 +            return true;
  1.1726 +        }
  1.1727 +        return false;
  1.1728 +    }
  1.1729 +
  1.1730 +    // US-ASCII only
  1.1731 +    private static boolean equalIgnoringCase(String s, String t) {
  1.1732 +        if (s == t) return true;
  1.1733 +        if ((s != null) && (t != null)) {
  1.1734 +            int n = s.length();
  1.1735 +            if (t.length() != n)
  1.1736 +                return false;
  1.1737 +            for (int i = 0; i < n; i++) {
  1.1738 +                if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
  1.1739 +                    return false;
  1.1740 +            }
  1.1741 +            return true;
  1.1742 +        }
  1.1743 +        return false;
  1.1744 +    }
  1.1745 +
  1.1746 +    private static int hash(int hash, String s) {
  1.1747 +        if (s == null) return hash;
  1.1748 +        return hash * 127 + s.hashCode();
  1.1749 +    }
  1.1750 +
  1.1751 +    // US-ASCII only
  1.1752 +    private static int hashIgnoringCase(int hash, String s) {
  1.1753 +        if (s == null) return hash;
  1.1754 +        int h = hash;
  1.1755 +        int n = s.length();
  1.1756 +        for (int i = 0; i < n; i++)
  1.1757 +            h = 31 * h + toLower(s.charAt(i));
  1.1758 +        return h;
  1.1759 +    }
  1.1760 +
  1.1761 +    private static int compare(String s, String t) {
  1.1762 +        if (s == t) return 0;
  1.1763 +        if (s != null) {
  1.1764 +            if (t != null)
  1.1765 +                return s.compareTo(t);
  1.1766 +            else
  1.1767 +                return +1;
  1.1768 +        } else {
  1.1769 +            return -1;
  1.1770 +        }
  1.1771 +    }
  1.1772 +
  1.1773 +    // US-ASCII only
  1.1774 +    private static int compareIgnoringCase(String s, String t) {
  1.1775 +        if (s == t) return 0;
  1.1776 +        if (s != null) {
  1.1777 +            if (t != null) {
  1.1778 +                int sn = s.length();
  1.1779 +                int tn = t.length();
  1.1780 +                int n = sn < tn ? sn : tn;
  1.1781 +                for (int i = 0; i < n; i++) {
  1.1782 +                    int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
  1.1783 +                    if (c != 0)
  1.1784 +                        return c;
  1.1785 +                }
  1.1786 +                return sn - tn;
  1.1787 +            }
  1.1788 +            return +1;
  1.1789 +        } else {
  1.1790 +            return -1;
  1.1791 +        }
  1.1792 +    }
  1.1793 +
  1.1794 +
  1.1795 +    // -- String construction --
  1.1796 +
  1.1797 +    // If a scheme is given then the path, if given, must be absolute
  1.1798 +    //
  1.1799 +    private static void checkPath(String s, String scheme, String path)
  1.1800 +        throws URISyntaxException
  1.1801 +    {
  1.1802 +        if (scheme != null) {
  1.1803 +            if ((path != null)
  1.1804 +                && ((path.length() > 0) && (path.charAt(0) != '/')))
  1.1805 +                throw new URISyntaxException(s,
  1.1806 +                                             "Relative path in absolute URI");
  1.1807 +        }
  1.1808 +    }
  1.1809 +
  1.1810 +    private void appendAuthority(StringBuffer sb,
  1.1811 +                                 String authority,
  1.1812 +                                 String userInfo,
  1.1813 +                                 String host,
  1.1814 +                                 int port)
  1.1815 +    {
  1.1816 +        if (host != null) {
  1.1817 +            sb.append("//");
  1.1818 +            if (userInfo != null) {
  1.1819 +                sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
  1.1820 +                sb.append('@');
  1.1821 +            }
  1.1822 +            boolean needBrackets = ((host.indexOf(':') >= 0)
  1.1823 +                                    && !host.startsWith("[")
  1.1824 +                                    && !host.endsWith("]"));
  1.1825 +            if (needBrackets) sb.append('[');
  1.1826 +            sb.append(host);
  1.1827 +            if (needBrackets) sb.append(']');
  1.1828 +            if (port != -1) {
  1.1829 +                sb.append(':');
  1.1830 +                sb.append(port);
  1.1831 +            }
  1.1832 +        } else if (authority != null) {
  1.1833 +            sb.append("//");
  1.1834 +            if (authority.startsWith("[")) {
  1.1835 +                // authority should (but may not) contain an embedded IPv6 address
  1.1836 +                int end = authority.indexOf("]");
  1.1837 +                String doquote = authority, dontquote = "";
  1.1838 +                if (end != -1 && authority.indexOf(":") != -1) {
  1.1839 +                    // the authority contains an IPv6 address
  1.1840 +                    if (end == authority.length()) {
  1.1841 +                        dontquote = authority;
  1.1842 +                        doquote = "";
  1.1843 +                    } else {
  1.1844 +                        dontquote = authority.substring(0 , end + 1);
  1.1845 +                        doquote = authority.substring(end + 1);
  1.1846 +                    }
  1.1847 +                }
  1.1848 +                sb.append(dontquote);
  1.1849 +                sb.append(quote(doquote,
  1.1850 +                            L_REG_NAME | L_SERVER,
  1.1851 +                            H_REG_NAME | H_SERVER));
  1.1852 +            } else {
  1.1853 +                sb.append(quote(authority,
  1.1854 +                            L_REG_NAME | L_SERVER,
  1.1855 +                            H_REG_NAME | H_SERVER));
  1.1856 +            }
  1.1857 +        }
  1.1858 +    }
  1.1859 +
  1.1860 +    private void appendSchemeSpecificPart(StringBuffer sb,
  1.1861 +                                          String opaquePart,
  1.1862 +                                          String authority,
  1.1863 +                                          String userInfo,
  1.1864 +                                          String host,
  1.1865 +                                          int port,
  1.1866 +                                          String path,
  1.1867 +                                          String query)
  1.1868 +    {
  1.1869 +        if (opaquePart != null) {
  1.1870 +            /* check if SSP begins with an IPv6 address
  1.1871 +             * because we must not quote a literal IPv6 address
  1.1872 +             */
  1.1873 +            if (opaquePart.startsWith("//[")) {
  1.1874 +                int end =  opaquePart.indexOf("]");
  1.1875 +                if (end != -1 && opaquePart.indexOf(":")!=-1) {
  1.1876 +                    String doquote, dontquote;
  1.1877 +                    if (end == opaquePart.length()) {
  1.1878 +                        dontquote = opaquePart;
  1.1879 +                        doquote = "";
  1.1880 +                    } else {
  1.1881 +                        dontquote = opaquePart.substring(0,end+1);
  1.1882 +                        doquote = opaquePart.substring(end+1);
  1.1883 +                    }
  1.1884 +                    sb.append (dontquote);
  1.1885 +                    sb.append(quote(doquote, L_URIC, H_URIC));
  1.1886 +                }
  1.1887 +            } else {
  1.1888 +                sb.append(quote(opaquePart, L_URIC, H_URIC));
  1.1889 +            }
  1.1890 +        } else {
  1.1891 +            appendAuthority(sb, authority, userInfo, host, port);
  1.1892 +            if (path != null)
  1.1893 +                sb.append(quote(path, L_PATH, H_PATH));
  1.1894 +            if (query != null) {
  1.1895 +                sb.append('?');
  1.1896 +                sb.append(quote(query, L_URIC, H_URIC));
  1.1897 +            }
  1.1898 +        }
  1.1899 +    }
  1.1900 +
  1.1901 +    private void appendFragment(StringBuffer sb, String fragment) {
  1.1902 +        if (fragment != null) {
  1.1903 +            sb.append('#');
  1.1904 +            sb.append(quote(fragment, L_URIC, H_URIC));
  1.1905 +        }
  1.1906 +    }
  1.1907 +
  1.1908 +    private String toString(String scheme,
  1.1909 +                            String opaquePart,
  1.1910 +                            String authority,
  1.1911 +                            String userInfo,
  1.1912 +                            String host,
  1.1913 +                            int port,
  1.1914 +                            String path,
  1.1915 +                            String query,
  1.1916 +                            String fragment)
  1.1917 +    {
  1.1918 +        StringBuffer sb = new StringBuffer();
  1.1919 +        if (scheme != null) {
  1.1920 +            sb.append(scheme);
  1.1921 +            sb.append(':');
  1.1922 +        }
  1.1923 +        appendSchemeSpecificPart(sb, opaquePart,
  1.1924 +                                 authority, userInfo, host, port,
  1.1925 +                                 path, query);
  1.1926 +        appendFragment(sb, fragment);
  1.1927 +        return sb.toString();
  1.1928 +    }
  1.1929 +
  1.1930 +    private void defineSchemeSpecificPart() {
  1.1931 +        if (schemeSpecificPart != null) return;
  1.1932 +        StringBuffer sb = new StringBuffer();
  1.1933 +        appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
  1.1934 +                                 host, port, getPath(), getQuery());
  1.1935 +        if (sb.length() == 0) return;
  1.1936 +        schemeSpecificPart = sb.toString();
  1.1937 +    }
  1.1938 +
  1.1939 +    private void defineString() {
  1.1940 +        if (string != null) return;
  1.1941 +
  1.1942 +        StringBuffer sb = new StringBuffer();
  1.1943 +        if (scheme != null) {
  1.1944 +            sb.append(scheme);
  1.1945 +            sb.append(':');
  1.1946 +        }
  1.1947 +        if (isOpaque()) {
  1.1948 +            sb.append(schemeSpecificPart);
  1.1949 +        } else {
  1.1950 +            if (host != null) {
  1.1951 +                sb.append("//");
  1.1952 +                if (userInfo != null) {
  1.1953 +                    sb.append(userInfo);
  1.1954 +                    sb.append('@');
  1.1955 +                }
  1.1956 +                boolean needBrackets = ((host.indexOf(':') >= 0)
  1.1957 +                                    && !host.startsWith("[")
  1.1958 +                                    && !host.endsWith("]"));
  1.1959 +                if (needBrackets) sb.append('[');
  1.1960 +                sb.append(host);
  1.1961 +                if (needBrackets) sb.append(']');
  1.1962 +                if (port != -1) {
  1.1963 +                    sb.append(':');
  1.1964 +                    sb.append(port);
  1.1965 +                }
  1.1966 +            } else if (authority != null) {
  1.1967 +                sb.append("//");
  1.1968 +                sb.append(authority);
  1.1969 +            }
  1.1970 +            if (path != null)
  1.1971 +                sb.append(path);
  1.1972 +            if (query != null) {
  1.1973 +                sb.append('?');
  1.1974 +                sb.append(query);
  1.1975 +            }
  1.1976 +        }
  1.1977 +        if (fragment != null) {
  1.1978 +            sb.append('#');
  1.1979 +            sb.append(fragment);
  1.1980 +        }
  1.1981 +        string = sb.toString();
  1.1982 +    }
  1.1983 +
  1.1984 +
  1.1985 +    // -- Normalization, resolution, and relativization --
  1.1986 +
  1.1987 +    // RFC2396 5.2 (6)
  1.1988 +    private static String resolvePath(String base, String child,
  1.1989 +                                      boolean absolute)
  1.1990 +    {
  1.1991 +        int i = base.lastIndexOf('/');
  1.1992 +        int cn = child.length();
  1.1993 +        String path = "";
  1.1994 +
  1.1995 +        if (cn == 0) {
  1.1996 +            // 5.2 (6a)
  1.1997 +            if (i >= 0)
  1.1998 +                path = base.substring(0, i + 1);
  1.1999 +        } else {
  1.2000 +            StringBuffer sb = new StringBuffer(base.length() + cn);
  1.2001 +            // 5.2 (6a)
  1.2002 +            if (i >= 0)
  1.2003 +                sb.append(base.substring(0, i + 1));
  1.2004 +            // 5.2 (6b)
  1.2005 +            sb.append(child);
  1.2006 +            path = sb.toString();
  1.2007 +        }
  1.2008 +
  1.2009 +        // 5.2 (6c-f)
  1.2010 +        String np = normalize(path);
  1.2011 +
  1.2012 +        // 5.2 (6g): If the result is absolute but the path begins with "../",
  1.2013 +        // then we simply leave the path as-is
  1.2014 +
  1.2015 +        return np;
  1.2016 +    }
  1.2017 +
  1.2018 +    // RFC2396 5.2
  1.2019 +    private static URI resolve(URI base, URI child) {
  1.2020 +        // check if child if opaque first so that NPE is thrown
  1.2021 +        // if child is null.
  1.2022 +        if (child.isOpaque() || base.isOpaque())
  1.2023 +            return child;
  1.2024 +
  1.2025 +        // 5.2 (2): Reference to current document (lone fragment)
  1.2026 +        if ((child.scheme == null) && (child.authority == null)
  1.2027 +            && child.path.equals("") && (child.fragment != null)
  1.2028 +            && (child.query == null)) {
  1.2029 +            if ((base.fragment != null)
  1.2030 +                && child.fragment.equals(base.fragment)) {
  1.2031 +                return base;
  1.2032 +            }
  1.2033 +            URI ru = new URI();
  1.2034 +            ru.scheme = base.scheme;
  1.2035 +            ru.authority = base.authority;
  1.2036 +            ru.userInfo = base.userInfo;
  1.2037 +            ru.host = base.host;
  1.2038 +            ru.port = base.port;
  1.2039 +            ru.path = base.path;
  1.2040 +            ru.fragment = child.fragment;
  1.2041 +            ru.query = base.query;
  1.2042 +            return ru;
  1.2043 +        }
  1.2044 +
  1.2045 +        // 5.2 (3): Child is absolute
  1.2046 +        if (child.scheme != null)
  1.2047 +            return child;
  1.2048 +
  1.2049 +        URI ru = new URI();             // Resolved URI
  1.2050 +        ru.scheme = base.scheme;
  1.2051 +        ru.query = child.query;
  1.2052 +        ru.fragment = child.fragment;
  1.2053 +
  1.2054 +        // 5.2 (4): Authority
  1.2055 +        if (child.authority == null) {
  1.2056 +            ru.authority = base.authority;
  1.2057 +            ru.host = base.host;
  1.2058 +            ru.userInfo = base.userInfo;
  1.2059 +            ru.port = base.port;
  1.2060 +
  1.2061 +            String cp = (child.path == null) ? "" : child.path;
  1.2062 +            if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
  1.2063 +                // 5.2 (5): Child path is absolute
  1.2064 +                ru.path = child.path;
  1.2065 +            } else {
  1.2066 +                // 5.2 (6): Resolve relative path
  1.2067 +                ru.path = resolvePath(base.path, cp, base.isAbsolute());
  1.2068 +            }
  1.2069 +        } else {
  1.2070 +            ru.authority = child.authority;
  1.2071 +            ru.host = child.host;
  1.2072 +            ru.userInfo = child.userInfo;
  1.2073 +            ru.host = child.host;
  1.2074 +            ru.port = child.port;
  1.2075 +            ru.path = child.path;
  1.2076 +        }
  1.2077 +
  1.2078 +        // 5.2 (7): Recombine (nothing to do here)
  1.2079 +        return ru;
  1.2080 +    }
  1.2081 +
  1.2082 +    // If the given URI's path is normal then return the URI;
  1.2083 +    // o.w., return a new URI containing the normalized path.
  1.2084 +    //
  1.2085 +    private static URI normalize(URI u) {
  1.2086 +        if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
  1.2087 +            return u;
  1.2088 +
  1.2089 +        String np = normalize(u.path);
  1.2090 +        if (np == u.path)
  1.2091 +            return u;
  1.2092 +
  1.2093 +        URI v = new URI();
  1.2094 +        v.scheme = u.scheme;
  1.2095 +        v.fragment = u.fragment;
  1.2096 +        v.authority = u.authority;
  1.2097 +        v.userInfo = u.userInfo;
  1.2098 +        v.host = u.host;
  1.2099 +        v.port = u.port;
  1.2100 +        v.path = np;
  1.2101 +        v.query = u.query;
  1.2102 +        return v;
  1.2103 +    }
  1.2104 +
  1.2105 +    // If both URIs are hierarchical, their scheme and authority components are
  1.2106 +    // identical, and the base path is a prefix of the child's path, then
  1.2107 +    // return a relative URI that, when resolved against the base, yields the
  1.2108 +    // child; otherwise, return the child.
  1.2109 +    //
  1.2110 +    private static URI relativize(URI base, URI child) {
  1.2111 +        // check if child if opaque first so that NPE is thrown
  1.2112 +        // if child is null.
  1.2113 +        if (child.isOpaque() || base.isOpaque())
  1.2114 +            return child;
  1.2115 +        if (!equalIgnoringCase(base.scheme, child.scheme)
  1.2116 +            || !equal(base.authority, child.authority))
  1.2117 +            return child;
  1.2118 +
  1.2119 +        String bp = normalize(base.path);
  1.2120 +        String cp = normalize(child.path);
  1.2121 +        if (!bp.equals(cp)) {
  1.2122 +            if (!bp.endsWith("/"))
  1.2123 +                bp = bp + "/";
  1.2124 +            if (!cp.startsWith(bp))
  1.2125 +                return child;
  1.2126 +        }
  1.2127 +
  1.2128 +        URI v = new URI();
  1.2129 +        v.path = cp.substring(bp.length());
  1.2130 +        v.query = child.query;
  1.2131 +        v.fragment = child.fragment;
  1.2132 +        return v;
  1.2133 +    }
  1.2134 +
  1.2135 +
  1.2136 +
  1.2137 +    // -- Path normalization --
  1.2138 +
  1.2139 +    // The following algorithm for path normalization avoids the creation of a
  1.2140 +    // string object for each segment, as well as the use of a string buffer to
  1.2141 +    // compute the final result, by using a single char array and editing it in
  1.2142 +    // place.  The array is first split into segments, replacing each slash
  1.2143 +    // with '\0' and creating a segment-index array, each element of which is
  1.2144 +    // the index of the first char in the corresponding segment.  We then walk
  1.2145 +    // through both arrays, removing ".", "..", and other segments as necessary
  1.2146 +    // by setting their entries in the index array to -1.  Finally, the two
  1.2147 +    // arrays are used to rejoin the segments and compute the final result.
  1.2148 +    //
  1.2149 +    // This code is based upon src/solaris/native/java/io/canonicalize_md.c
  1.2150 +
  1.2151 +
  1.2152 +    // Check the given path to see if it might need normalization.  A path
  1.2153 +    // might need normalization if it contains duplicate slashes, a "."
  1.2154 +    // segment, or a ".." segment.  Return -1 if no further normalization is
  1.2155 +    // possible, otherwise return the number of segments found.
  1.2156 +    //
  1.2157 +    // This method takes a string argument rather than a char array so that
  1.2158 +    // this test can be performed without invoking path.toCharArray().
  1.2159 +    //
  1.2160 +    static private int needsNormalization(String path) {
  1.2161 +        boolean normal = true;
  1.2162 +        int ns = 0;                     // Number of segments
  1.2163 +        int end = path.length() - 1;    // Index of last char in path
  1.2164 +        int p = 0;                      // Index of next char in path
  1.2165 +
  1.2166 +        // Skip initial slashes
  1.2167 +        while (p <= end) {
  1.2168 +            if (path.charAt(p) != '/') break;
  1.2169 +            p++;
  1.2170 +        }
  1.2171 +        if (p > 1) normal = false;
  1.2172 +
  1.2173 +        // Scan segments
  1.2174 +        while (p <= end) {
  1.2175 +
  1.2176 +            // Looking at "." or ".." ?
  1.2177 +            if ((path.charAt(p) == '.')
  1.2178 +                && ((p == end)
  1.2179 +                    || ((path.charAt(p + 1) == '/')
  1.2180 +                        || ((path.charAt(p + 1) == '.')
  1.2181 +                            && ((p + 1 == end)
  1.2182 +                                || (path.charAt(p + 2) == '/')))))) {
  1.2183 +                normal = false;
  1.2184 +            }
  1.2185 +            ns++;
  1.2186 +
  1.2187 +            // Find beginning of next segment
  1.2188 +            while (p <= end) {
  1.2189 +                if (path.charAt(p++) != '/')
  1.2190 +                    continue;
  1.2191 +
  1.2192 +                // Skip redundant slashes
  1.2193 +                while (p <= end) {
  1.2194 +                    if (path.charAt(p) != '/') break;
  1.2195 +                    normal = false;
  1.2196 +                    p++;
  1.2197 +                }
  1.2198 +
  1.2199 +                break;
  1.2200 +            }
  1.2201 +        }
  1.2202 +
  1.2203 +        return normal ? -1 : ns;
  1.2204 +    }
  1.2205 +
  1.2206 +
  1.2207 +    // Split the given path into segments, replacing slashes with nulls and
  1.2208 +    // filling in the given segment-index array.
  1.2209 +    //
  1.2210 +    // Preconditions:
  1.2211 +    //   segs.length == Number of segments in path
  1.2212 +    //
  1.2213 +    // Postconditions:
  1.2214 +    //   All slashes in path replaced by '\0'
  1.2215 +    //   segs[i] == Index of first char in segment i (0 <= i < segs.length)
  1.2216 +    //
  1.2217 +    static private void split(char[] path, int[] segs) {
  1.2218 +        int end = path.length - 1;      // Index of last char in path
  1.2219 +        int p = 0;                      // Index of next char in path
  1.2220 +        int i = 0;                      // Index of current segment
  1.2221 +
  1.2222 +        // Skip initial slashes
  1.2223 +        while (p <= end) {
  1.2224 +            if (path[p] != '/') break;
  1.2225 +            path[p] = '\0';
  1.2226 +            p++;
  1.2227 +        }
  1.2228 +
  1.2229 +        while (p <= end) {
  1.2230 +
  1.2231 +            // Note start of segment
  1.2232 +            segs[i++] = p++;
  1.2233 +
  1.2234 +            // Find beginning of next segment
  1.2235 +            while (p <= end) {
  1.2236 +                if (path[p++] != '/')
  1.2237 +                    continue;
  1.2238 +                path[p - 1] = '\0';
  1.2239 +
  1.2240 +                // Skip redundant slashes
  1.2241 +                while (p <= end) {
  1.2242 +                    if (path[p] != '/') break;
  1.2243 +                    path[p++] = '\0';
  1.2244 +                }
  1.2245 +                break;
  1.2246 +            }
  1.2247 +        }
  1.2248 +
  1.2249 +        if (i != segs.length)
  1.2250 +            throw new InternalError();  // ASSERT
  1.2251 +    }
  1.2252 +
  1.2253 +
  1.2254 +    // Join the segments in the given path according to the given segment-index
  1.2255 +    // array, ignoring those segments whose index entries have been set to -1,
  1.2256 +    // and inserting slashes as needed.  Return the length of the resulting
  1.2257 +    // path.
  1.2258 +    //
  1.2259 +    // Preconditions:
  1.2260 +    //   segs[i] == -1 implies segment i is to be ignored
  1.2261 +    //   path computed by split, as above, with '\0' having replaced '/'
  1.2262 +    //
  1.2263 +    // Postconditions:
  1.2264 +    //   path[0] .. path[return value] == Resulting path
  1.2265 +    //
  1.2266 +    static private int join(char[] path, int[] segs) {
  1.2267 +        int ns = segs.length;           // Number of segments
  1.2268 +        int end = path.length - 1;      // Index of last char in path
  1.2269 +        int p = 0;                      // Index of next path char to write
  1.2270 +
  1.2271 +        if (path[p] == '\0') {
  1.2272 +            // Restore initial slash for absolute paths
  1.2273 +            path[p++] = '/';
  1.2274 +        }
  1.2275 +
  1.2276 +        for (int i = 0; i < ns; i++) {
  1.2277 +            int q = segs[i];            // Current segment
  1.2278 +            if (q == -1)
  1.2279 +                // Ignore this segment
  1.2280 +                continue;
  1.2281 +
  1.2282 +            if (p == q) {
  1.2283 +                // We're already at this segment, so just skip to its end
  1.2284 +                while ((p <= end) && (path[p] != '\0'))
  1.2285 +                    p++;
  1.2286 +                if (p <= end) {
  1.2287 +                    // Preserve trailing slash
  1.2288 +                    path[p++] = '/';
  1.2289 +                }
  1.2290 +            } else if (p < q) {
  1.2291 +                // Copy q down to p
  1.2292 +                while ((q <= end) && (path[q] != '\0'))
  1.2293 +                    path[p++] = path[q++];
  1.2294 +                if (q <= end) {
  1.2295 +                    // Preserve trailing slash
  1.2296 +                    path[p++] = '/';
  1.2297 +                }
  1.2298 +            } else
  1.2299 +                throw new InternalError(); // ASSERT false
  1.2300 +        }
  1.2301 +
  1.2302 +        return p;
  1.2303 +    }
  1.2304 +
  1.2305 +
  1.2306 +    // Remove "." segments from the given path, and remove segment pairs
  1.2307 +    // consisting of a non-".." segment followed by a ".." segment.
  1.2308 +    //
  1.2309 +    private static void removeDots(char[] path, int[] segs) {
  1.2310 +        int ns = segs.length;
  1.2311 +        int end = path.length - 1;
  1.2312 +
  1.2313 +        for (int i = 0; i < ns; i++) {
  1.2314 +            int dots = 0;               // Number of dots found (0, 1, or 2)
  1.2315 +
  1.2316 +            // Find next occurrence of "." or ".."
  1.2317 +            do {
  1.2318 +                int p = segs[i];
  1.2319 +                if (path[p] == '.') {
  1.2320 +                    if (p == end) {
  1.2321 +                        dots = 1;
  1.2322 +                        break;
  1.2323 +                    } else if (path[p + 1] == '\0') {
  1.2324 +                        dots = 1;
  1.2325 +                        break;
  1.2326 +                    } else if ((path[p + 1] == '.')
  1.2327 +                               && ((p + 1 == end)
  1.2328 +                                   || (path[p + 2] == '\0'))) {
  1.2329 +                        dots = 2;
  1.2330 +                        break;
  1.2331 +                    }
  1.2332 +                }
  1.2333 +                i++;
  1.2334 +            } while (i < ns);
  1.2335 +            if ((i > ns) || (dots == 0))
  1.2336 +                break;
  1.2337 +
  1.2338 +            if (dots == 1) {
  1.2339 +                // Remove this occurrence of "."
  1.2340 +                segs[i] = -1;
  1.2341 +            } else {
  1.2342 +                // If there is a preceding non-".." segment, remove both that
  1.2343 +                // segment and this occurrence of ".."; otherwise, leave this
  1.2344 +                // ".." segment as-is.
  1.2345 +                int j;
  1.2346 +                for (j = i - 1; j >= 0; j--) {
  1.2347 +                    if (segs[j] != -1) break;
  1.2348 +                }
  1.2349 +                if (j >= 0) {
  1.2350 +                    int q = segs[j];
  1.2351 +                    if (!((path[q] == '.')
  1.2352 +                          && (path[q + 1] == '.')
  1.2353 +                          && (path[q + 2] == '\0'))) {
  1.2354 +                        segs[i] = -1;
  1.2355 +                        segs[j] = -1;
  1.2356 +                    }
  1.2357 +                }
  1.2358 +            }
  1.2359 +        }
  1.2360 +    }
  1.2361 +
  1.2362 +
  1.2363 +    // DEVIATION: If the normalized path is relative, and if the first
  1.2364 +    // segment could be parsed as a scheme name, then prepend a "." segment
  1.2365 +    //
  1.2366 +    private static void maybeAddLeadingDot(char[] path, int[] segs) {
  1.2367 +
  1.2368 +        if (path[0] == '\0')
  1.2369 +            // The path is absolute
  1.2370 +            return;
  1.2371 +
  1.2372 +        int ns = segs.length;
  1.2373 +        int f = 0;                      // Index of first segment
  1.2374 +        while (f < ns) {
  1.2375 +            if (segs[f] >= 0)
  1.2376 +                break;
  1.2377 +            f++;
  1.2378 +        }
  1.2379 +        if ((f >= ns) || (f == 0))
  1.2380 +            // The path is empty, or else the original first segment survived,
  1.2381 +            // in which case we already know that no leading "." is needed
  1.2382 +            return;
  1.2383 +
  1.2384 +        int p = segs[f];
  1.2385 +        while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
  1.2386 +        if (p >= path.length || path[p] == '\0')
  1.2387 +            // No colon in first segment, so no "." needed
  1.2388 +            return;
  1.2389 +
  1.2390 +        // At this point we know that the first segment is unused,
  1.2391 +        // hence we can insert a "." segment at that position
  1.2392 +        path[0] = '.';
  1.2393 +        path[1] = '\0';
  1.2394 +        segs[0] = 0;
  1.2395 +    }
  1.2396 +
  1.2397 +
  1.2398 +    // Normalize the given path string.  A normal path string has no empty
  1.2399 +    // segments (i.e., occurrences of "//"), no segments equal to ".", and no
  1.2400 +    // segments equal to ".." that are preceded by a segment not equal to "..".
  1.2401 +    // In contrast to Unix-style pathname normalization, for URI paths we
  1.2402 +    // always retain trailing slashes.
  1.2403 +    //
  1.2404 +    private static String normalize(String ps) {
  1.2405 +
  1.2406 +        // Does this path need normalization?
  1.2407 +        int ns = needsNormalization(ps);        // Number of segments
  1.2408 +        if (ns < 0)
  1.2409 +            // Nope -- just return it
  1.2410 +            return ps;
  1.2411 +
  1.2412 +        char[] path = ps.toCharArray();         // Path in char-array form
  1.2413 +
  1.2414 +        // Split path into segments
  1.2415 +        int[] segs = new int[ns];               // Segment-index array
  1.2416 +        split(path, segs);
  1.2417 +
  1.2418 +        // Remove dots
  1.2419 +        removeDots(path, segs);
  1.2420 +
  1.2421 +        // Prevent scheme-name confusion
  1.2422 +        maybeAddLeadingDot(path, segs);
  1.2423 +
  1.2424 +        // Join the remaining segments and return the result
  1.2425 +        String s = new String(path, 0, join(path, segs));
  1.2426 +        if (s.equals(ps)) {
  1.2427 +            // string was already normalized
  1.2428 +            return ps;
  1.2429 +        }
  1.2430 +        return s;
  1.2431 +    }
  1.2432 +
  1.2433 +
  1.2434 +
  1.2435 +    // -- Character classes for parsing --
  1.2436 +
  1.2437 +    // RFC2396 precisely specifies which characters in the US-ASCII charset are
  1.2438 +    // permissible in the various components of a URI reference.  We here
  1.2439 +    // define a set of mask pairs to aid in enforcing these restrictions.  Each
  1.2440 +    // mask pair consists of two longs, a low mask and a high mask.  Taken
  1.2441 +    // together they represent a 128-bit mask, where bit i is set iff the
  1.2442 +    // character with value i is permitted.
  1.2443 +    //
  1.2444 +    // This approach is more efficient than sequentially searching arrays of
  1.2445 +    // permitted characters.  It could be made still more efficient by
  1.2446 +    // precompiling the mask information so that a character's presence in a
  1.2447 +    // given mask could be determined by a single table lookup.
  1.2448 +
  1.2449 +    // Compute the low-order mask for the characters in the given string
  1.2450 +    private static long lowMask(String chars) {
  1.2451 +        int n = chars.length();
  1.2452 +        long m = 0;
  1.2453 +        for (int i = 0; i < n; i++) {
  1.2454 +            char c = chars.charAt(i);
  1.2455 +            if (c < 64)
  1.2456 +                m |= (1L << c);
  1.2457 +        }
  1.2458 +        return m;
  1.2459 +    }
  1.2460 +
  1.2461 +    // Compute the high-order mask for the characters in the given string
  1.2462 +    private static long highMask(String chars) {
  1.2463 +        int n = chars.length();
  1.2464 +        long m = 0;
  1.2465 +        for (int i = 0; i < n; i++) {
  1.2466 +            char c = chars.charAt(i);
  1.2467 +            if ((c >= 64) && (c < 128))
  1.2468 +                m |= (1L << (c - 64));
  1.2469 +        }
  1.2470 +        return m;
  1.2471 +    }
  1.2472 +
  1.2473 +    // Compute a low-order mask for the characters
  1.2474 +    // between first and last, inclusive
  1.2475 +    private static long lowMask(char first, char last) {
  1.2476 +        long m = 0;
  1.2477 +        int f = Math.max(Math.min(first, 63), 0);
  1.2478 +        int l = Math.max(Math.min(last, 63), 0);
  1.2479 +        for (int i = f; i <= l; i++)
  1.2480 +            m |= 1L << i;
  1.2481 +        return m;
  1.2482 +    }
  1.2483 +
  1.2484 +    // Compute a high-order mask for the characters
  1.2485 +    // between first and last, inclusive
  1.2486 +    private static long highMask(char first, char last) {
  1.2487 +        long m = 0;
  1.2488 +        int f = Math.max(Math.min(first, 127), 64) - 64;
  1.2489 +        int l = Math.max(Math.min(last, 127), 64) - 64;
  1.2490 +        for (int i = f; i <= l; i++)
  1.2491 +            m |= 1L << i;
  1.2492 +        return m;
  1.2493 +    }
  1.2494 +
  1.2495 +    // Tell whether the given character is permitted by the given mask pair
  1.2496 +    private static boolean match(char c, long lowMask, long highMask) {
  1.2497 +        if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
  1.2498 +            return false;
  1.2499 +        if (c < 64)
  1.2500 +            return ((1L << c) & lowMask) != 0;
  1.2501 +        if (c < 128)
  1.2502 +            return ((1L << (c - 64)) & highMask) != 0;
  1.2503 +        return false;
  1.2504 +    }
  1.2505 +
  1.2506 +    // Character-class masks, in reverse order from RFC2396 because
  1.2507 +    // initializers for static fields cannot make forward references.
  1.2508 +
  1.2509 +    // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
  1.2510 +    //            "8" | "9"
  1.2511 +    private static final long L_DIGIT = lowMask('0', '9');
  1.2512 +    private static final long H_DIGIT = 0L;
  1.2513 +
  1.2514 +    // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
  1.2515 +    //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
  1.2516 +    //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
  1.2517 +    private static final long L_UPALPHA = 0L;
  1.2518 +    private static final long H_UPALPHA = highMask('A', 'Z');
  1.2519 +
  1.2520 +    // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
  1.2521 +    //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
  1.2522 +    //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
  1.2523 +    private static final long L_LOWALPHA = 0L;
  1.2524 +    private static final long H_LOWALPHA = highMask('a', 'z');
  1.2525 +
  1.2526 +    // alpha         = lowalpha | upalpha
  1.2527 +    private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
  1.2528 +    private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
  1.2529 +
  1.2530 +    // alphanum      = alpha | digit
  1.2531 +    private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
  1.2532 +    private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
  1.2533 +
  1.2534 +    // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  1.2535 +    //                         "a" | "b" | "c" | "d" | "e" | "f"
  1.2536 +    private static final long L_HEX = L_DIGIT;
  1.2537 +    private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');
  1.2538 +
  1.2539 +    // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
  1.2540 +    //                 "(" | ")"
  1.2541 +    private static final long L_MARK = lowMask("-_.!~*'()");
  1.2542 +    private static final long H_MARK = highMask("-_.!~*'()");
  1.2543 +
  1.2544 +    // unreserved    = alphanum | mark
  1.2545 +    private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
  1.2546 +    private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
  1.2547 +
  1.2548 +    // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  1.2549 +    //                 "$" | "," | "[" | "]"
  1.2550 +    // Added per RFC2732: "[", "]"
  1.2551 +    private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
  1.2552 +    private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
  1.2553 +
  1.2554 +    // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
  1.2555 +    // characters are allowed; this is handled by the scanEscape method below.
  1.2556 +    private static final long L_ESCAPED = 1L;
  1.2557 +    private static final long H_ESCAPED = 0L;
  1.2558 +
  1.2559 +    // uric          = reserved | unreserved | escaped
  1.2560 +    private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
  1.2561 +    private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
  1.2562 +
  1.2563 +    // pchar         = unreserved | escaped |
  1.2564 +    //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
  1.2565 +    private static final long L_PCHAR
  1.2566 +        = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");
  1.2567 +    private static final long H_PCHAR
  1.2568 +        = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");
  1.2569 +
  1.2570 +    // All valid path characters
  1.2571 +    private static final long L_PATH = L_PCHAR | lowMask(";/");
  1.2572 +    private static final long H_PATH = H_PCHAR | highMask(";/");
  1.2573 +
  1.2574 +    // Dash, for use in domainlabel and toplabel
  1.2575 +    private static final long L_DASH = lowMask("-");
  1.2576 +    private static final long H_DASH = highMask("-");
  1.2577 +
  1.2578 +    // Dot, for use in hostnames
  1.2579 +    private static final long L_DOT = lowMask(".");
  1.2580 +    private static final long H_DOT = highMask(".");
  1.2581 +
  1.2582 +    // userinfo      = *( unreserved | escaped |
  1.2583 +    //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
  1.2584 +    private static final long L_USERINFO
  1.2585 +        = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");
  1.2586 +    private static final long H_USERINFO
  1.2587 +        = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");
  1.2588 +
  1.2589 +    // reg_name      = 1*( unreserved | escaped | "$" | "," |
  1.2590 +    //                     ";" | ":" | "@" | "&" | "=" | "+" )
  1.2591 +    private static final long L_REG_NAME
  1.2592 +        = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");
  1.2593 +    private static final long H_REG_NAME
  1.2594 +        = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");
  1.2595 +
  1.2596 +    // All valid characters for server-based authorities
  1.2597 +    private static final long L_SERVER
  1.2598 +        = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");
  1.2599 +    private static final long H_SERVER
  1.2600 +        = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");
  1.2601 +
  1.2602 +    // Special case of server authority that represents an IPv6 address
  1.2603 +    // In this case, a % does not signify an escape sequence
  1.2604 +    private static final long L_SERVER_PERCENT
  1.2605 +        = L_SERVER | lowMask("%");
  1.2606 +    private static final long H_SERVER_PERCENT
  1.2607 +        = H_SERVER | highMask("%");
  1.2608 +    private static final long L_LEFT_BRACKET = lowMask("[");
  1.2609 +    private static final long H_LEFT_BRACKET = highMask("[");
  1.2610 +
  1.2611 +    // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
  1.2612 +    private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
  1.2613 +    private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
  1.2614 +
  1.2615 +    // uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
  1.2616 +    //                 "&" | "=" | "+" | "$" | ","
  1.2617 +    private static final long L_URIC_NO_SLASH
  1.2618 +        = L_UNRESERVED | L_ESCAPED | lowMask(";?:@&=+$,");
  1.2619 +    private static final long H_URIC_NO_SLASH
  1.2620 +        = H_UNRESERVED | H_ESCAPED | highMask(";?:@&=+$,");
  1.2621 +
  1.2622 +
  1.2623 +    // -- Escaping and encoding --
  1.2624 +
  1.2625 +    private final static char[] hexDigits = {
  1.2626 +        '0', '1', '2', '3', '4', '5', '6', '7',
  1.2627 +        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
  1.2628 +    };
  1.2629 +
  1.2630 +    private static void appendEscape(StringBuffer sb, byte b) {
  1.2631 +        sb.append('%');
  1.2632 +        sb.append(hexDigits[(b >> 4) & 0x0f]);
  1.2633 +        sb.append(hexDigits[(b >> 0) & 0x0f]);
  1.2634 +    }
  1.2635 +
  1.2636 +    private static void appendEncoded(StringBuffer sb, char c) {
  1.2637 +        ByteBuffer bb = null;
  1.2638 +        try {
  1.2639 +            bb = ThreadLocalCoders.encoderFor("UTF-8")
  1.2640 +                .encode(CharBuffer.wrap("" + c));
  1.2641 +        } catch (CharacterCodingException x) {
  1.2642 +            assert false;
  1.2643 +        }
  1.2644 +        while (bb.hasRemaining()) {
  1.2645 +            int b = bb.get() & 0xff;
  1.2646 +            if (b >= 0x80)
  1.2647 +                appendEscape(sb, (byte)b);
  1.2648 +            else
  1.2649 +                sb.append((char)b);
  1.2650 +        }
  1.2651 +    }
  1.2652 +
  1.2653 +    // Quote any characters in s that are not permitted
  1.2654 +    // by the given mask pair
  1.2655 +    //
  1.2656 +    private static String quote(String s, long lowMask, long highMask) {
  1.2657 +        int n = s.length();
  1.2658 +        StringBuffer sb = null;
  1.2659 +        boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
  1.2660 +        for (int i = 0; i < s.length(); i++) {
  1.2661 +            char c = s.charAt(i);
  1.2662 +            if (c < '\u0080') {
  1.2663 +                if (!match(c, lowMask, highMask)) {
  1.2664 +                    if (sb == null) {
  1.2665 +                        sb = new StringBuffer();
  1.2666 +                        sb.append(s.substring(0, i));
  1.2667 +                    }
  1.2668 +                    appendEscape(sb, (byte)c);
  1.2669 +                } else {
  1.2670 +                    if (sb != null)
  1.2671 +                        sb.append(c);
  1.2672 +                }
  1.2673 +            } else if (allowNonASCII
  1.2674 +                       && (Character.isSpaceChar(c)
  1.2675 +                           || Character.isISOControl(c))) {
  1.2676 +                if (sb == null) {
  1.2677 +                    sb = new StringBuffer();
  1.2678 +                    sb.append(s.substring(0, i));
  1.2679 +                }
  1.2680 +                appendEncoded(sb, c);
  1.2681 +            } else {
  1.2682 +                if (sb != null)
  1.2683 +                    sb.append(c);
  1.2684 +            }
  1.2685 +        }
  1.2686 +        return (sb == null) ? s : sb.toString();
  1.2687 +    }
  1.2688 +
  1.2689 +    // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
  1.2690 +    // assuming that s is otherwise legal
  1.2691 +    //
  1.2692 +    private static String encode(String s) {
  1.2693 +        int n = s.length();
  1.2694 +        if (n == 0)
  1.2695 +            return s;
  1.2696 +
  1.2697 +        // First check whether we actually need to encode
  1.2698 +        for (int i = 0;;) {
  1.2699 +            if (s.charAt(i) >= '\u0080')
  1.2700 +                break;
  1.2701 +            if (++i >= n)
  1.2702 +                return s;
  1.2703 +        }
  1.2704 +
  1.2705 +        String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
  1.2706 +        ByteBuffer bb = null;
  1.2707 +        try {
  1.2708 +            bb = ThreadLocalCoders.encoderFor("UTF-8")
  1.2709 +                .encode(CharBuffer.wrap(ns));
  1.2710 +        } catch (CharacterCodingException x) {
  1.2711 +            assert false;
  1.2712 +        }
  1.2713 +
  1.2714 +        StringBuffer sb = new StringBuffer();
  1.2715 +        while (bb.hasRemaining()) {
  1.2716 +            int b = bb.get() & 0xff;
  1.2717 +            if (b >= 0x80)
  1.2718 +                appendEscape(sb, (byte)b);
  1.2719 +            else
  1.2720 +                sb.append((char)b);
  1.2721 +        }
  1.2722 +        return sb.toString();
  1.2723 +    }
  1.2724 +
  1.2725 +    private static int decode(char c) {
  1.2726 +        if ((c >= '0') && (c <= '9'))
  1.2727 +            return c - '0';
  1.2728 +        if ((c >= 'a') && (c <= 'f'))
  1.2729 +            return c - 'a' + 10;
  1.2730 +        if ((c >= 'A') && (c <= 'F'))
  1.2731 +            return c - 'A' + 10;
  1.2732 +        assert false;
  1.2733 +        return -1;
  1.2734 +    }
  1.2735 +
  1.2736 +    private static byte decode(char c1, char c2) {
  1.2737 +        return (byte)(  ((decode(c1) & 0xf) << 4)
  1.2738 +                      | ((decode(c2) & 0xf) << 0));
  1.2739 +    }
  1.2740 +
  1.2741 +    // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
  1.2742 +    // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
  1.2743 +    // sequence of escaped octets is not valid UTF-8 then the erroneous octets
  1.2744 +    // are replaced with '\uFFFD'.
  1.2745 +    // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
  1.2746 +    //            with a scope_id
  1.2747 +    //
  1.2748 +    private static String decode(String s) {
  1.2749 +        if (s == null)
  1.2750 +            return s;
  1.2751 +        int n = s.length();
  1.2752 +        if (n == 0)
  1.2753 +            return s;
  1.2754 +        if (s.indexOf('%') < 0)
  1.2755 +            return s;
  1.2756 +
  1.2757 +        StringBuffer sb = new StringBuffer(n);
  1.2758 +        ByteBuffer bb = ByteBuffer.allocate(n);
  1.2759 +        CharBuffer cb = CharBuffer.allocate(n);
  1.2760 +        CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
  1.2761 +            .onMalformedInput(CodingErrorAction.REPLACE)
  1.2762 +            .onUnmappableCharacter(CodingErrorAction.REPLACE);
  1.2763 +
  1.2764 +        // This is not horribly efficient, but it will do for now
  1.2765 +        char c = s.charAt(0);
  1.2766 +        boolean betweenBrackets = false;
  1.2767 +
  1.2768 +        for (int i = 0; i < n;) {
  1.2769 +            assert c == s.charAt(i);    // Loop invariant
  1.2770 +            if (c == '[') {
  1.2771 +                betweenBrackets = true;
  1.2772 +            } else if (betweenBrackets && c == ']') {
  1.2773 +                betweenBrackets = false;
  1.2774 +            }
  1.2775 +            if (c != '%' || betweenBrackets) {
  1.2776 +                sb.append(c);
  1.2777 +                if (++i >= n)
  1.2778 +                    break;
  1.2779 +                c = s.charAt(i);
  1.2780 +                continue;
  1.2781 +            }
  1.2782 +            bb.clear();
  1.2783 +            int ui = i;
  1.2784 +            for (;;) {
  1.2785 +                assert (n - i >= 2);
  1.2786 +                bb.put(decode(s.charAt(++i), s.charAt(++i)));
  1.2787 +                if (++i >= n)
  1.2788 +                    break;
  1.2789 +                c = s.charAt(i);
  1.2790 +                if (c != '%')
  1.2791 +                    break;
  1.2792 +            }
  1.2793 +            bb.flip();
  1.2794 +            cb.clear();
  1.2795 +            dec.reset();
  1.2796 +            CoderResult cr = dec.decode(bb, cb, true);
  1.2797 +            assert cr.isUnderflow();
  1.2798 +            cr = dec.flush(cb);
  1.2799 +            assert cr.isUnderflow();
  1.2800 +            sb.append(cb.flip().toString());
  1.2801 +        }
  1.2802 +
  1.2803 +        return sb.toString();
  1.2804 +    }
  1.2805 +
  1.2806 +
  1.2807 +    // -- Parsing --
  1.2808 +
  1.2809 +    // For convenience we wrap the input URI string in a new instance of the
  1.2810 +    // following internal class.  This saves always having to pass the input
  1.2811 +    // string as an argument to each internal scan/parse method.
  1.2812 +
  1.2813 +    private class Parser {
  1.2814 +
  1.2815 +        private String input;           // URI input string
  1.2816 +        private boolean requireServerAuthority = false;
  1.2817 +
  1.2818 +        Parser(String s) {
  1.2819 +            input = s;
  1.2820 +            string = s;
  1.2821 +        }
  1.2822 +
  1.2823 +        // -- Methods for throwing URISyntaxException in various ways --
  1.2824 +
  1.2825 +        private void fail(String reason) throws URISyntaxException {
  1.2826 +            throw new URISyntaxException(input, reason);
  1.2827 +        }
  1.2828 +
  1.2829 +        private void fail(String reason, int p) throws URISyntaxException {
  1.2830 +            throw new URISyntaxException(input, reason, p);
  1.2831 +        }
  1.2832 +
  1.2833 +        private void failExpecting(String expected, int p)
  1.2834 +            throws URISyntaxException
  1.2835 +        {
  1.2836 +            fail("Expected " + expected, p);
  1.2837 +        }
  1.2838 +
  1.2839 +        private void failExpecting(String expected, String prior, int p)
  1.2840 +            throws URISyntaxException
  1.2841 +        {
  1.2842 +            fail("Expected " + expected + " following " + prior, p);
  1.2843 +        }
  1.2844 +
  1.2845 +
  1.2846 +        // -- Simple access to the input string --
  1.2847 +
  1.2848 +        // Return a substring of the input string
  1.2849 +        //
  1.2850 +        private String substring(int start, int end) {
  1.2851 +            return input.substring(start, end);
  1.2852 +        }
  1.2853 +
  1.2854 +        // Return the char at position p,
  1.2855 +        // assuming that p < input.length()
  1.2856 +        //
  1.2857 +        private char charAt(int p) {
  1.2858 +            return input.charAt(p);
  1.2859 +        }
  1.2860 +
  1.2861 +        // Tells whether start < end and, if so, whether charAt(start) == c
  1.2862 +        //
  1.2863 +        private boolean at(int start, int end, char c) {
  1.2864 +            return (start < end) && (charAt(start) == c);
  1.2865 +        }
  1.2866 +
  1.2867 +        // Tells whether start + s.length() < end and, if so,
  1.2868 +        // whether the chars at the start position match s exactly
  1.2869 +        //
  1.2870 +        private boolean at(int start, int end, String s) {
  1.2871 +            int p = start;
  1.2872 +            int sn = s.length();
  1.2873 +            if (sn > end - p)
  1.2874 +                return false;
  1.2875 +            int i = 0;
  1.2876 +            while (i < sn) {
  1.2877 +                if (charAt(p++) != s.charAt(i)) {
  1.2878 +                    break;
  1.2879 +                }
  1.2880 +                i++;
  1.2881 +            }
  1.2882 +            return (i == sn);
  1.2883 +        }
  1.2884 +
  1.2885 +
  1.2886 +        // -- Scanning --
  1.2887 +
  1.2888 +        // The various scan and parse methods that follow use a uniform
  1.2889 +        // convention of taking the current start position and end index as
  1.2890 +        // their first two arguments.  The start is inclusive while the end is
  1.2891 +        // exclusive, just as in the String class, i.e., a start/end pair
  1.2892 +        // denotes the left-open interval [start, end) of the input string.
  1.2893 +        //
  1.2894 +        // These methods never proceed past the end position.  They may return
  1.2895 +        // -1 to indicate outright failure, but more often they simply return
  1.2896 +        // the position of the first char after the last char scanned.  Thus
  1.2897 +        // a typical idiom is
  1.2898 +        //
  1.2899 +        //     int p = start;
  1.2900 +        //     int q = scan(p, end, ...);
  1.2901 +        //     if (q > p)
  1.2902 +        //         // We scanned something
  1.2903 +        //         ...;
  1.2904 +        //     else if (q == p)
  1.2905 +        //         // We scanned nothing
  1.2906 +        //         ...;
  1.2907 +        //     else if (q == -1)
  1.2908 +        //         // Something went wrong
  1.2909 +        //         ...;
  1.2910 +
  1.2911 +
  1.2912 +        // Scan a specific char: If the char at the given start position is
  1.2913 +        // equal to c, return the index of the next char; otherwise, return the
  1.2914 +        // start position.
  1.2915 +        //
  1.2916 +        private int scan(int start, int end, char c) {
  1.2917 +            if ((start < end) && (charAt(start) == c))
  1.2918 +                return start + 1;
  1.2919 +            return start;
  1.2920 +        }
  1.2921 +
  1.2922 +        // Scan forward from the given start position.  Stop at the first char
  1.2923 +        // in the err string (in which case -1 is returned), or the first char
  1.2924 +        // in the stop string (in which case the index of the preceding char is
  1.2925 +        // returned), or the end of the input string (in which case the length
  1.2926 +        // of the input string is returned).  May return the start position if
  1.2927 +        // nothing matches.
  1.2928 +        //
  1.2929 +        private int scan(int start, int end, String err, String stop) {
  1.2930 +            int p = start;
  1.2931 +            while (p < end) {
  1.2932 +                char c = charAt(p);
  1.2933 +                if (err.indexOf(c) >= 0)
  1.2934 +                    return -1;
  1.2935 +                if (stop.indexOf(c) >= 0)
  1.2936 +                    break;
  1.2937 +                p++;
  1.2938 +            }
  1.2939 +            return p;
  1.2940 +        }
  1.2941 +
  1.2942 +        // Scan a potential escape sequence, starting at the given position,
  1.2943 +        // with the given first char (i.e., charAt(start) == c).
  1.2944 +        //
  1.2945 +        // This method assumes that if escapes are allowed then visible
  1.2946 +        // non-US-ASCII chars are also allowed.
  1.2947 +        //
  1.2948 +        private int scanEscape(int start, int n, char first)
  1.2949 +            throws URISyntaxException
  1.2950 +        {
  1.2951 +            int p = start;
  1.2952 +            char c = first;
  1.2953 +            if (c == '%') {
  1.2954 +                // Process escape pair
  1.2955 +                if ((p + 3 <= n)
  1.2956 +                    && match(charAt(p + 1), L_HEX, H_HEX)
  1.2957 +                    && match(charAt(p + 2), L_HEX, H_HEX)) {
  1.2958 +                    return p + 3;
  1.2959 +                }
  1.2960 +                fail("Malformed escape pair", p);
  1.2961 +            } else if ((c > 128)
  1.2962 +                       && !Character.isSpaceChar(c)
  1.2963 +                       && !Character.isISOControl(c)) {
  1.2964 +                // Allow unescaped but visible non-US-ASCII chars
  1.2965 +                return p + 1;
  1.2966 +            }
  1.2967 +            return p;
  1.2968 +        }
  1.2969 +
  1.2970 +        // Scan chars that match the given mask pair
  1.2971 +        //
  1.2972 +        private int scan(int start, int n, long lowMask, long highMask)
  1.2973 +            throws URISyntaxException
  1.2974 +        {
  1.2975 +            int p = start;
  1.2976 +            while (p < n) {
  1.2977 +                char c = charAt(p);
  1.2978 +                if (match(c, lowMask, highMask)) {
  1.2979 +                    p++;
  1.2980 +                    continue;
  1.2981 +                }
  1.2982 +                if ((lowMask & L_ESCAPED) != 0) {
  1.2983 +                    int q = scanEscape(p, n, c);
  1.2984 +                    if (q > p) {
  1.2985 +                        p = q;
  1.2986 +                        continue;
  1.2987 +                    }
  1.2988 +                }
  1.2989 +                break;
  1.2990 +            }
  1.2991 +            return p;
  1.2992 +        }
  1.2993 +
  1.2994 +        // Check that each of the chars in [start, end) matches the given mask
  1.2995 +        //
  1.2996 +        private void checkChars(int start, int end,
  1.2997 +                                long lowMask, long highMask,
  1.2998 +                                String what)
  1.2999 +            throws URISyntaxException
  1.3000 +        {
  1.3001 +            int p = scan(start, end, lowMask, highMask);
  1.3002 +            if (p < end)
  1.3003 +                fail("Illegal character in " + what, p);
  1.3004 +        }
  1.3005 +
  1.3006 +        // Check that the char at position p matches the given mask
  1.3007 +        //
  1.3008 +        private void checkChar(int p,
  1.3009 +                               long lowMask, long highMask,
  1.3010 +                               String what)
  1.3011 +            throws URISyntaxException
  1.3012 +        {
  1.3013 +            checkChars(p, p + 1, lowMask, highMask, what);
  1.3014 +        }
  1.3015 +
  1.3016 +
  1.3017 +        // -- Parsing --
  1.3018 +
  1.3019 +        // [<scheme>:]<scheme-specific-part>[#<fragment>]
  1.3020 +        //
  1.3021 +        void parse(boolean rsa) throws URISyntaxException {
  1.3022 +            requireServerAuthority = rsa;
  1.3023 +            int ssp;                    // Start of scheme-specific part
  1.3024 +            int n = input.length();
  1.3025 +            int p = scan(0, n, "/?#", ":");
  1.3026 +            if ((p >= 0) && at(p, n, ':')) {
  1.3027 +                if (p == 0)
  1.3028 +                    failExpecting("scheme name", 0);
  1.3029 +                checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
  1.3030 +                checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
  1.3031 +                scheme = substring(0, p);
  1.3032 +                p++;                    // Skip ':'
  1.3033 +                ssp = p;
  1.3034 +                if (at(p, n, '/')) {
  1.3035 +                    p = parseHierarchical(p, n);
  1.3036 +                } else {
  1.3037 +                    int q = scan(p, n, "", "#");
  1.3038 +                    if (q <= p)
  1.3039 +                        failExpecting("scheme-specific part", p);
  1.3040 +                    checkChars(p, q, L_URIC, H_URIC, "opaque part");
  1.3041 +                    p = q;
  1.3042 +                }
  1.3043 +            } else {
  1.3044 +                ssp = 0;
  1.3045 +                p = parseHierarchical(0, n);
  1.3046 +            }
  1.3047 +            schemeSpecificPart = substring(ssp, p);
  1.3048 +            if (at(p, n, '#')) {
  1.3049 +                checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
  1.3050 +                fragment = substring(p + 1, n);
  1.3051 +                p = n;
  1.3052 +            }
  1.3053 +            if (p < n)
  1.3054 +                fail("end of URI", p);
  1.3055 +        }
  1.3056 +
  1.3057 +        // [//authority]<path>[?<query>]
  1.3058 +        //
  1.3059 +        // DEVIATION from RFC2396: We allow an empty authority component as
  1.3060 +        // long as it's followed by a non-empty path, query component, or
  1.3061 +        // fragment component.  This is so that URIs such as "file:///foo/bar"
  1.3062 +        // will parse.  This seems to be the intent of RFC2396, though the
  1.3063 +        // grammar does not permit it.  If the authority is empty then the
  1.3064 +        // userInfo, host, and port components are undefined.
  1.3065 +        //
  1.3066 +        // DEVIATION from RFC2396: We allow empty relative paths.  This seems
  1.3067 +        // to be the intent of RFC2396, but the grammar does not permit it.
  1.3068 +        // The primary consequence of this deviation is that "#f" parses as a
  1.3069 +        // relative URI with an empty path.
  1.3070 +        //
  1.3071 +        private int parseHierarchical(int start, int n)
  1.3072 +            throws URISyntaxException
  1.3073 +        {
  1.3074 +            int p = start;
  1.3075 +            if (at(p, n, '/') && at(p + 1, n, '/')) {
  1.3076 +                p += 2;
  1.3077 +                int q = scan(p, n, "", "/?#");
  1.3078 +                if (q > p) {
  1.3079 +                    p = parseAuthority(p, q);
  1.3080 +                } else if (q < n) {
  1.3081 +                    // DEVIATION: Allow empty authority prior to non-empty
  1.3082 +                    // path, query component or fragment identifier
  1.3083 +                } else
  1.3084 +                    failExpecting("authority", p);
  1.3085 +            }
  1.3086 +            int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
  1.3087 +            checkChars(p, q, L_PATH, H_PATH, "path");
  1.3088 +            path = substring(p, q);
  1.3089 +            p = q;
  1.3090 +            if (at(p, n, '?')) {
  1.3091 +                p++;
  1.3092 +                q = scan(p, n, "", "#");
  1.3093 +                checkChars(p, q, L_URIC, H_URIC, "query");
  1.3094 +                query = substring(p, q);
  1.3095 +                p = q;
  1.3096 +            }
  1.3097 +            return p;
  1.3098 +        }
  1.3099 +
  1.3100 +        // authority     = server | reg_name
  1.3101 +        //
  1.3102 +        // Ambiguity: An authority that is a registry name rather than a server
  1.3103 +        // might have a prefix that parses as a server.  We use the fact that
  1.3104 +        // the authority component is always followed by '/' or the end of the
  1.3105 +        // input string to resolve this: If the complete authority did not
  1.3106 +        // parse as a server then we try to parse it as a registry name.
  1.3107 +        //
  1.3108 +        private int parseAuthority(int start, int n)
  1.3109 +            throws URISyntaxException
  1.3110 +        {
  1.3111 +            int p = start;
  1.3112 +            int q = p;
  1.3113 +            URISyntaxException ex = null;
  1.3114 +
  1.3115 +            boolean serverChars;
  1.3116 +            boolean regChars;
  1.3117 +
  1.3118 +            if (scan(p, n, "", "]") > p) {
  1.3119 +                // contains a literal IPv6 address, therefore % is allowed
  1.3120 +                serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
  1.3121 +            } else {
  1.3122 +                serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
  1.3123 +            }
  1.3124 +            regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
  1.3125 +
  1.3126 +            if (regChars && !serverChars) {
  1.3127 +                // Must be a registry-based authority
  1.3128 +                authority = substring(p, n);
  1.3129 +                return n;
  1.3130 +            }
  1.3131 +
  1.3132 +            if (serverChars) {
  1.3133 +                // Might be (probably is) a server-based authority, so attempt
  1.3134 +                // to parse it as such.  If the attempt fails, try to treat it
  1.3135 +                // as a registry-based authority.
  1.3136 +                try {
  1.3137 +                    q = parseServer(p, n);
  1.3138 +                    if (q < n)
  1.3139 +                        failExpecting("end of authority", q);
  1.3140 +                    authority = substring(p, n);
  1.3141 +                } catch (URISyntaxException x) {
  1.3142 +                    // Undo results of failed parse
  1.3143 +                    userInfo = null;
  1.3144 +                    host = null;
  1.3145 +                    port = -1;
  1.3146 +                    if (requireServerAuthority) {
  1.3147 +                        // If we're insisting upon a server-based authority,
  1.3148 +                        // then just re-throw the exception
  1.3149 +                        throw x;
  1.3150 +                    } else {
  1.3151 +                        // Save the exception in case it doesn't parse as a
  1.3152 +                        // registry either
  1.3153 +                        ex = x;
  1.3154 +                        q = p;
  1.3155 +                    }
  1.3156 +                }
  1.3157 +            }
  1.3158 +
  1.3159 +            if (q < n) {
  1.3160 +                if (regChars) {
  1.3161 +                    // Registry-based authority
  1.3162 +                    authority = substring(p, n);
  1.3163 +                } else if (ex != null) {
  1.3164 +                    // Re-throw exception; it was probably due to
  1.3165 +                    // a malformed IPv6 address
  1.3166 +                    throw ex;
  1.3167 +                } else {
  1.3168 +                    fail("Illegal character in authority", q);
  1.3169 +                }
  1.3170 +            }
  1.3171 +
  1.3172 +            return n;
  1.3173 +        }
  1.3174 +
  1.3175 +
  1.3176 +        // [<userinfo>@]<host>[:<port>]
  1.3177 +        //
  1.3178 +        private int parseServer(int start, int n)
  1.3179 +            throws URISyntaxException
  1.3180 +        {
  1.3181 +            int p = start;
  1.3182 +            int q;
  1.3183 +
  1.3184 +            // userinfo
  1.3185 +            q = scan(p, n, "/?#", "@");
  1.3186 +            if ((q >= p) && at(q, n, '@')) {
  1.3187 +                checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
  1.3188 +                userInfo = substring(p, q);
  1.3189 +                p = q + 1;              // Skip '@'
  1.3190 +            }
  1.3191 +
  1.3192 +            // hostname, IPv4 address, or IPv6 address
  1.3193 +            if (at(p, n, '[')) {
  1.3194 +                // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
  1.3195 +                p++;
  1.3196 +                q = scan(p, n, "/?#", "]");
  1.3197 +                if ((q > p) && at(q, n, ']')) {
  1.3198 +                    // look for a "%" scope id
  1.3199 +                    int r = scan (p, q, "", "%");
  1.3200 +                    if (r > p) {
  1.3201 +                        parseIPv6Reference(p, r);
  1.3202 +                        if (r+1 == q) {
  1.3203 +                            fail ("scope id expected");
  1.3204 +                        }
  1.3205 +                        checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
  1.3206 +                                                "scope id");
  1.3207 +                    } else {
  1.3208 +                        parseIPv6Reference(p, q);
  1.3209 +                    }
  1.3210 +                    host = substring(p-1, q+1);
  1.3211 +                    p = q + 1;
  1.3212 +                } else {
  1.3213 +                    failExpecting("closing bracket for IPv6 address", q);
  1.3214 +                }
  1.3215 +            } else {
  1.3216 +                q = parseIPv4Address(p, n);
  1.3217 +                if (q <= p)
  1.3218 +                    q = parseHostname(p, n);
  1.3219 +                p = q;
  1.3220 +            }
  1.3221 +
  1.3222 +            // port
  1.3223 +            if (at(p, n, ':')) {
  1.3224 +                p++;
  1.3225 +                q = scan(p, n, "", "/");
  1.3226 +                if (q > p) {
  1.3227 +                    checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
  1.3228 +                    try {
  1.3229 +                        port = Integer.parseInt(substring(p, q));
  1.3230 +                    } catch (NumberFormatException x) {
  1.3231 +                        fail("Malformed port number", p);
  1.3232 +                    }
  1.3233 +                    p = q;
  1.3234 +                }
  1.3235 +            }
  1.3236 +            if (p < n)
  1.3237 +                failExpecting("port number", p);
  1.3238 +
  1.3239 +            return p;
  1.3240 +        }
  1.3241 +
  1.3242 +        // Scan a string of decimal digits whose value fits in a byte
  1.3243 +        //
  1.3244 +        private int scanByte(int start, int n)
  1.3245 +            throws URISyntaxException
  1.3246 +        {
  1.3247 +            int p = start;
  1.3248 +            int q = scan(p, n, L_DIGIT, H_DIGIT);
  1.3249 +            if (q <= p) return q;
  1.3250 +            if (Integer.parseInt(substring(p, q)) > 255) return p;
  1.3251 +            return q;
  1.3252 +        }
  1.3253 +
  1.3254 +        // Scan an IPv4 address.
  1.3255 +        //
  1.3256 +        // If the strict argument is true then we require that the given
  1.3257 +        // interval contain nothing besides an IPv4 address; if it is false
  1.3258 +        // then we only require that it start with an IPv4 address.
  1.3259 +        //
  1.3260 +        // If the interval does not contain or start with (depending upon the
  1.3261 +        // strict argument) a legal IPv4 address characters then we return -1
  1.3262 +        // immediately; otherwise we insist that these characters parse as a
  1.3263 +        // legal IPv4 address and throw an exception on failure.
  1.3264 +        //
  1.3265 +        // We assume that any string of decimal digits and dots must be an IPv4
  1.3266 +        // address.  It won't parse as a hostname anyway, so making that
  1.3267 +        // assumption here allows more meaningful exceptions to be thrown.
  1.3268 +        //
  1.3269 +        private int scanIPv4Address(int start, int n, boolean strict)
  1.3270 +            throws URISyntaxException
  1.3271 +        {
  1.3272 +            int p = start;
  1.3273 +            int q;
  1.3274 +            int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
  1.3275 +            if ((m <= p) || (strict && (m != n)))
  1.3276 +                return -1;
  1.3277 +            for (;;) {
  1.3278 +                // Per RFC2732: At most three digits per byte
  1.3279 +                // Further constraint: Each element fits in a byte
  1.3280 +                if ((q = scanByte(p, m)) <= p) break;   p = q;
  1.3281 +                if ((q = scan(p, m, '.')) <= p) break;  p = q;
  1.3282 +                if ((q = scanByte(p, m)) <= p) break;   p = q;
  1.3283 +                if ((q = scan(p, m, '.')) <= p) break;  p = q;
  1.3284 +                if ((q = scanByte(p, m)) <= p) break;   p = q;
  1.3285 +                if ((q = scan(p, m, '.')) <= p) break;  p = q;
  1.3286 +                if ((q = scanByte(p, m)) <= p) break;   p = q;
  1.3287 +                if (q < m) break;
  1.3288 +                return q;
  1.3289 +            }
  1.3290 +            fail("Malformed IPv4 address", q);
  1.3291 +            return -1;
  1.3292 +        }
  1.3293 +
  1.3294 +        // Take an IPv4 address: Throw an exception if the given interval
  1.3295 +        // contains anything except an IPv4 address
  1.3296 +        //
  1.3297 +        private int takeIPv4Address(int start, int n, String expected)
  1.3298 +            throws URISyntaxException
  1.3299 +        {
  1.3300 +            int p = scanIPv4Address(start, n, true);
  1.3301 +            if (p <= start)
  1.3302 +                failExpecting(expected, start);
  1.3303 +            return p;
  1.3304 +        }
  1.3305 +
  1.3306 +        // Attempt to parse an IPv4 address, returning -1 on failure but
  1.3307 +        // allowing the given interval to contain [:<characters>] after
  1.3308 +        // the IPv4 address.
  1.3309 +        //
  1.3310 +        private int parseIPv4Address(int start, int n) {
  1.3311 +            int p;
  1.3312 +
  1.3313 +            try {
  1.3314 +                p = scanIPv4Address(start, n, false);
  1.3315 +            } catch (URISyntaxException x) {
  1.3316 +                return -1;
  1.3317 +            } catch (NumberFormatException nfe) {
  1.3318 +                return -1;
  1.3319 +            }
  1.3320 +
  1.3321 +            if (p > start && p < n) {
  1.3322 +                // IPv4 address is followed by something - check that
  1.3323 +                // it's a ":" as this is the only valid character to
  1.3324 +                // follow an address.
  1.3325 +                if (charAt(p) != ':') {
  1.3326 +                    p = -1;
  1.3327 +                }
  1.3328 +            }
  1.3329 +
  1.3330 +            if (p > start)
  1.3331 +                host = substring(start, p);
  1.3332 +
  1.3333 +            return p;
  1.3334 +        }
  1.3335 +
  1.3336 +        // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
  1.3337 +        // domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
  1.3338 +        // toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
  1.3339 +        //
  1.3340 +        private int parseHostname(int start, int n)
  1.3341 +            throws URISyntaxException
  1.3342 +        {
  1.3343 +            int p = start;
  1.3344 +            int q;
  1.3345 +            int l = -1;                 // Start of last parsed label
  1.3346 +
  1.3347 +            do {
  1.3348 +                // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]
  1.3349 +                q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
  1.3350 +                if (q <= p)
  1.3351 +                    break;
  1.3352 +                l = p;
  1.3353 +                if (q > p) {
  1.3354 +                    p = q;
  1.3355 +                    q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);
  1.3356 +                    if (q > p) {
  1.3357 +                        if (charAt(q - 1) == '-')
  1.3358 +                            fail("Illegal character in hostname", q - 1);
  1.3359 +                        p = q;
  1.3360 +                    }
  1.3361 +                }
  1.3362 +                q = scan(p, n, '.');
  1.3363 +                if (q <= p)
  1.3364 +                    break;
  1.3365 +                p = q;
  1.3366 +            } while (p < n);
  1.3367 +
  1.3368 +            if ((p < n) && !at(p, n, ':'))
  1.3369 +                fail("Illegal character in hostname", p);
  1.3370 +
  1.3371 +            if (l < 0)
  1.3372 +                failExpecting("hostname", start);
  1.3373 +
  1.3374 +            // for a fully qualified hostname check that the rightmost
  1.3375 +            // label starts with an alpha character.
  1.3376 +            if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
  1.3377 +                fail("Illegal character in hostname", l);
  1.3378 +            }
  1.3379 +
  1.3380 +            host = substring(start, p);
  1.3381 +            return p;
  1.3382 +        }
  1.3383 +
  1.3384 +
  1.3385 +        // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
  1.3386 +        //
  1.3387 +        // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
  1.3388 +        // the form ::12.34.56.78, which are clearly shown in the examples
  1.3389 +        // earlier in the document.  Here is the original grammar:
  1.3390 +        //
  1.3391 +        //   IPv6address = hexpart [ ":" IPv4address ]
  1.3392 +        //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
  1.3393 +        //   hexseq      = hex4 *( ":" hex4)
  1.3394 +        //   hex4        = 1*4HEXDIG
  1.3395 +        //
  1.3396 +        // We therefore use the following revised grammar:
  1.3397 +        //
  1.3398 +        //   IPv6address = hexseq [ ":" IPv4address ]
  1.3399 +        //                 | hexseq [ "::" [ hexpost ] ]
  1.3400 +        //                 | "::" [ hexpost ]
  1.3401 +        //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
  1.3402 +        //   hexseq      = hex4 *( ":" hex4)
  1.3403 +        //   hex4        = 1*4HEXDIG
  1.3404 +        //
  1.3405 +        // This covers all and only the following cases:
  1.3406 +        //
  1.3407 +        //   hexseq
  1.3408 +        //   hexseq : IPv4address
  1.3409 +        //   hexseq ::
  1.3410 +        //   hexseq :: hexseq
  1.3411 +        //   hexseq :: hexseq : IPv4address
  1.3412 +        //   hexseq :: IPv4address
  1.3413 +        //   :: hexseq
  1.3414 +        //   :: hexseq : IPv4address
  1.3415 +        //   :: IPv4address
  1.3416 +        //   ::
  1.3417 +        //
  1.3418 +        // Additionally we constrain the IPv6 address as follows :-
  1.3419 +        //
  1.3420 +        //  i.  IPv6 addresses without compressed zeros should contain
  1.3421 +        //      exactly 16 bytes.
  1.3422 +        //
  1.3423 +        //  ii. IPv6 addresses with compressed zeros should contain
  1.3424 +        //      less than 16 bytes.
  1.3425 +
  1.3426 +        private int ipv6byteCount = 0;
  1.3427 +
  1.3428 +        private int parseIPv6Reference(int start, int n)
  1.3429 +            throws URISyntaxException
  1.3430 +        {
  1.3431 +            int p = start;
  1.3432 +            int q;
  1.3433 +            boolean compressedZeros = false;
  1.3434 +
  1.3435 +            q = scanHexSeq(p, n);
  1.3436 +
  1.3437 +            if (q > p) {
  1.3438 +                p = q;
  1.3439 +                if (at(p, n, "::")) {
  1.3440 +                    compressedZeros = true;
  1.3441 +                    p = scanHexPost(p + 2, n);
  1.3442 +                } else if (at(p, n, ':')) {
  1.3443 +                    p = takeIPv4Address(p + 1,  n, "IPv4 address");
  1.3444 +                    ipv6byteCount += 4;
  1.3445 +                }
  1.3446 +            } else if (at(p, n, "::")) {
  1.3447 +                compressedZeros = true;
  1.3448 +                p = scanHexPost(p + 2, n);
  1.3449 +            }
  1.3450 +            if (p < n)
  1.3451 +                fail("Malformed IPv6 address", start);
  1.3452 +            if (ipv6byteCount > 16)
  1.3453 +                fail("IPv6 address too long", start);
  1.3454 +            if (!compressedZeros && ipv6byteCount < 16)
  1.3455 +                fail("IPv6 address too short", start);
  1.3456 +            if (compressedZeros && ipv6byteCount == 16)
  1.3457 +                fail("Malformed IPv6 address", start);
  1.3458 +
  1.3459 +            return p;
  1.3460 +        }
  1.3461 +
  1.3462 +        private int scanHexPost(int start, int n)
  1.3463 +            throws URISyntaxException
  1.3464 +        {
  1.3465 +            int p = start;
  1.3466 +            int q;
  1.3467 +
  1.3468 +            if (p == n)
  1.3469 +                return p;
  1.3470 +
  1.3471 +            q = scanHexSeq(p, n);
  1.3472 +            if (q > p) {
  1.3473 +                p = q;
  1.3474 +                if (at(p, n, ':')) {
  1.3475 +                    p++;
  1.3476 +                    p = takeIPv4Address(p, n, "hex digits or IPv4 address");
  1.3477 +                    ipv6byteCount += 4;
  1.3478 +                }
  1.3479 +            } else {
  1.3480 +                p = takeIPv4Address(p, n, "hex digits or IPv4 address");
  1.3481 +                ipv6byteCount += 4;
  1.3482 +            }
  1.3483 +            return p;
  1.3484 +        }
  1.3485 +
  1.3486 +        // Scan a hex sequence; return -1 if one could not be scanned
  1.3487 +        //
  1.3488 +        private int scanHexSeq(int start, int n)
  1.3489 +            throws URISyntaxException
  1.3490 +        {
  1.3491 +            int p = start;
  1.3492 +            int q;
  1.3493 +
  1.3494 +            q = scan(p, n, L_HEX, H_HEX);
  1.3495 +            if (q <= p)
  1.3496 +                return -1;
  1.3497 +            if (at(q, n, '.'))          // Beginning of IPv4 address
  1.3498 +                return -1;
  1.3499 +            if (q > p + 4)
  1.3500 +                fail("IPv6 hexadecimal digit sequence too long", p);
  1.3501 +            ipv6byteCount += 2;
  1.3502 +            p = q;
  1.3503 +            while (p < n) {
  1.3504 +                if (!at(p, n, ':'))
  1.3505 +                    break;
  1.3506 +                if (at(p + 1, n, ':'))
  1.3507 +                    break;              // "::"
  1.3508 +                p++;
  1.3509 +                q = scan(p, n, L_HEX, H_HEX);
  1.3510 +                if (q <= p)
  1.3511 +                    failExpecting("digits for an IPv6 address", p);
  1.3512 +                if (at(q, n, '.')) {    // Beginning of IPv4 address
  1.3513 +                    p--;
  1.3514 +                    break;
  1.3515 +                }
  1.3516 +                if (q > p + 4)
  1.3517 +                    fail("IPv6 hexadecimal digit sequence too long", p);
  1.3518 +                ipv6byteCount += 2;
  1.3519 +                p = q;
  1.3520 +            }
  1.3521 +
  1.3522 +            return p;
  1.3523 +        }
  1.3524 +
  1.3525 +    }
  1.3526 +
  1.3527 +}