hg/bck2brwsr: rt/emul/compact/src/main/java/java/net/URI.java@9926996eca2d (annotated)

jaroslav@1258	1	/*
jaroslav@1258	2	* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
jaroslav@1258	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@1258	4	*
jaroslav@1258	5	* This code is free software; you can redistribute it and/or modify it
jaroslav@1258	6	* under the terms of the GNU General Public License version 2 only, as
jaroslav@1258	7	* published by the Free Software Foundation. Oracle designates this
jaroslav@1258	8	* particular file as subject to the "Classpath" exception as provided
jaroslav@1258	9	* by Oracle in the LICENSE file that accompanied this code.
jaroslav@1258	10	*
jaroslav@1258	11	* This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@1258	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@1258	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
jaroslav@1258	14	* version 2 for more details (a copy is included in the LICENSE file that
jaroslav@1258	15	* accompanied this code).
jaroslav@1258	16	*
jaroslav@1258	17	* You should have received a copy of the GNU General Public License version
jaroslav@1258	18	* 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@1258	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@1258	20	*
jaroslav@1258	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@1258	22	* or visit www.oracle.com if you need additional information or have any
jaroslav@1258	23	* questions.
jaroslav@1258	24	*/
jaroslav@1258	25
jaroslav@1258	26	package java.net;
jaroslav@1258	27
jaroslav@1258	28	import java.io.IOException;
jaroslav@1258	29	import java.io.InvalidObjectException;
jaroslav@1258	30	import java.io.ObjectInputStream;
jaroslav@1258	31	import java.io.ObjectOutputStream;
jaroslav@1258	32	import java.io.Serializable;
jaroslav@1258	33
jaroslav@1258	34	import java.lang.Character; // for javadoc
jaroslav@1258	35	import java.lang.NullPointerException; // for javadoc
jaroslav@1258	36
jaroslav@1258	37
jaroslav@1258	38	/**
jaroslav@1258	39	* Represents a Uniform Resource Identifier (URI) reference.
jaroslav@1258	40	*
jaroslav@1258	41	* <p> Aside from some minor deviations noted below, an instance of this
jaroslav@1258	42	* class represents a URI reference as defined by
jaroslav@1258	43	* <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC 2396: Uniform
jaroslav@1258	44	* Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
jaroslav@1258	45	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
jaroslav@1258	46	* Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
jaroslav@1258	47	* also supports scope_ids. The syntax and usage of scope_ids is described
jaroslav@1258	48	* <a href="Inet6Address.html#scoped">here</a>.
jaroslav@1258	49	* This class provides constructors for creating URI instances from
jaroslav@1258	50	* their components or by parsing their string forms, methods for accessing the
jaroslav@1258	51	* various components of an instance, and methods for normalizing, resolving,
jaroslav@1258	52	* and relativizing URI instances. Instances of this class are immutable.
jaroslav@1258	53	*
jaroslav@1258	54	*
jaroslav@1258	55	* <h4> URI syntax and components </h4>
jaroslav@1258	56	*
jaroslav@1258	57	* At the highest level a URI reference (hereinafter simply "URI") in string
jaroslav@1258	58	* form has the syntax
jaroslav@1258	59	*
jaroslav@1258	60	* <blockquote>
jaroslav@1258	61	* [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>]
jaroslav@1258	62	* </blockquote>
jaroslav@1258	63	*
jaroslav@1258	64	* where square brackets [...] delineate optional components and the characters
jaroslav@1258	65	* <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves.
jaroslav@1258	66	*
jaroslav@1258	67	* <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
jaroslav@1258	68	* said to be <i>relative</i>. URIs are also classified according to whether
jaroslav@1258	69	* they are <i>opaque</i> or <i>hierarchical</i>.
jaroslav@1258	70	*
jaroslav@1258	71	* <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
jaroslav@1258	72	* not begin with a slash character (<tt>'/'</tt>). Opaque URIs are not
jaroslav@1258	73	* subject to further parsing. Some examples of opaque URIs are:
jaroslav@1258	74	*
jaroslav@1258	75	* <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
jaroslav@1258	76	* <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr>
jaroslav@1258	77	* <tr><td><tt>news:comp.lang.java</tt><td></tr>
jaroslav@1258	78	* <tr><td><tt>urn:isbn:096139210x</tt></td></tr>
jaroslav@1258	79	* </table></blockquote>
jaroslav@1258	80	*
jaroslav@1258	81	* <p> A <i>hierarchical</i> URI is either an absolute URI whose
jaroslav@1258	82	* scheme-specific part begins with a slash character, or a relative URI, that
jaroslav@1258	83	* is, a URI that does not specify a scheme. Some examples of hierarchical
jaroslav@1258	84	* URIs are:
jaroslav@1258	85	*
jaroslav@1258	86	* <blockquote>
jaroslav@1258	87	* <tt>http://java.sun.com/j2se/1.3/</tt><br>
jaroslav@1258	88	* <tt>docs/guide/collections/designfaq.html#28</tt><br>
jaroslav@1258	89	* <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt><br>
jaroslav@1258	90	* <tt>file:///~/calendar</tt>
jaroslav@1258	91	* </blockquote>
jaroslav@1258	92	*
jaroslav@1258	93	* <p> A hierarchical URI is subject to further parsing according to the syntax
jaroslav@1258	94	*
jaroslav@1258	95	* <blockquote>
jaroslav@1258	96	* [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>]
jaroslav@1258	97	* </blockquote>
jaroslav@1258	98	*
jaroslav@1258	99	* where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>,
jaroslav@1258	100	* <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves. The
jaroslav@1258	101	* scheme-specific part of a hierarchical URI consists of the characters
jaroslav@1258	102	* between the scheme and fragment components.
jaroslav@1258	103	*
jaroslav@1258	104	* <p> The authority component of a hierarchical URI is, if specified, either
jaroslav@1258	105	* <i>server-based</i> or <i>registry-based</i>. A server-based authority
jaroslav@1258	106	* parses according to the familiar syntax
jaroslav@1258	107	*
jaroslav@1258	108	* <blockquote>
jaroslav@1258	109	* [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>]
jaroslav@1258	110	* </blockquote>
jaroslav@1258	111	*
jaroslav@1258	112	* where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for
jaroslav@1258	113	* themselves. Nearly all URI schemes currently in use are server-based. An
jaroslav@1258	114	* authority component that does not parse in this way is considered to be
jaroslav@1258	115	* registry-based.
jaroslav@1258	116	*
jaroslav@1258	117	* <p> The path component of a hierarchical URI is itself said to be absolute
jaroslav@1258	118	* if it begins with a slash character (<tt>'/'</tt>); otherwise it is
jaroslav@1258	119	* relative. The path of a hierarchical URI that is either absolute or
jaroslav@1258	120	* specifies an authority is always absolute.
jaroslav@1258	121	*
jaroslav@1258	122	* <p> All told, then, a URI instance has the following nine components:
jaroslav@1258	123	*
jaroslav@1258	124	* <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
jaroslav@1258	125	* <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
jaroslav@1258	126	* <tr><td>scheme</td><td><tt>String</tt></td></tr>
jaroslav@1258	127	* <tr><td>scheme-specific-part    </td><td><tt>String</tt></td></tr>
jaroslav@1258	128	* <tr><td>authority</td><td><tt>String</tt></td></tr>
jaroslav@1258	129	* <tr><td>user-info</td><td><tt>String</tt></td></tr>
jaroslav@1258	130	* <tr><td>host</td><td><tt>String</tt></td></tr>
jaroslav@1258	131	* <tr><td>port</td><td><tt>int</tt></td></tr>
jaroslav@1258	132	* <tr><td>path</td><td><tt>String</tt></td></tr>
jaroslav@1258	133	* <tr><td>query</td><td><tt>String</tt></td></tr>
jaroslav@1258	134	* <tr><td>fragment</td><td><tt>String</tt></td></tr>
jaroslav@1258	135	* </table></blockquote>
jaroslav@1258	136	*
jaroslav@1258	137	* In a given instance any particular component is either <i>undefined</i> or
jaroslav@1258	138	* <i>defined</i> with a distinct value. Undefined string components are
jaroslav@1258	139	* represented by <tt>null</tt>, while undefined integer components are
jaroslav@1258	140	* represented by <tt>-1</tt>. A string component may be defined to have the
jaroslav@1258	141	* empty string as its value; this is not equivalent to that component being
jaroslav@1258	142	* undefined.
jaroslav@1258	143	*
jaroslav@1258	144	* <p> Whether a particular component is or is not defined in an instance
jaroslav@1258	145	* depends upon the type of the URI being represented. An absolute URI has a
jaroslav@1258	146	* scheme component. An opaque URI has a scheme, a scheme-specific part, and
jaroslav@1258	147	* possibly a fragment, but has no other components. A hierarchical URI always
jaroslav@1258	148	* has a path (though it may be empty) and a scheme-specific-part (which at
jaroslav@1258	149	* least contains the path), and may have any of the other components. If the
jaroslav@1258	150	* authority component is present and is server-based then the host component
jaroslav@1258	151	* will be defined and the user-information and port components may be defined.
jaroslav@1258	152	*
jaroslav@1258	153	*
jaroslav@1258	154	* <h4> Operations on URI instances </h4>
jaroslav@1258	155	*
jaroslav@1258	156	* The key operations supported by this class are those of
jaroslav@1258	157	* <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
jaroslav@1258	158	*
jaroslav@1258	159	* <p> <i>Normalization</i> is the process of removing unnecessary <tt>"."</tt>
jaroslav@1258	160	* and <tt>".."</tt> segments from the path component of a hierarchical URI.
jaroslav@1258	161	* Each <tt>"."</tt> segment is simply removed. A <tt>".."</tt> segment is
jaroslav@1258	162	* removed only if it is preceded by a non-<tt>".."</tt> segment.
jaroslav@1258	163	* Normalization has no effect upon opaque URIs.
jaroslav@1258	164	*
jaroslav@1258	165	* <p> <i>Resolution</i> is the process of resolving one URI against another,
jaroslav@1258	166	* <i>base</i> URI. The resulting URI is constructed from components of both
jaroslav@1258	167	* URIs in the manner specified by RFC 2396, taking components from the
jaroslav@1258	168	* base URI for those not specified in the original. For hierarchical URIs,
jaroslav@1258	169	* the path of the original is resolved against the path of the base and then
jaroslav@1258	170	* normalized. The result, for example, of resolving
jaroslav@1258	171	*
jaroslav@1258	172	* <blockquote>
jaroslav@1258	173	* <tt>docs/guide/collections/designfaq.html#28          </tt>(1)
jaroslav@1258	174	* </blockquote>
jaroslav@1258	175	*
jaroslav@1258	176	* against the base URI <tt>http://java.sun.com/j2se/1.3/</tt> is the result
jaroslav@1258	177	* URI
jaroslav@1258	178	*
jaroslav@1258	179	* <blockquote>
jaroslav@1258	180	* <tt>http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28</tt>
jaroslav@1258	181	* </blockquote>
jaroslav@1258	182	*
jaroslav@1258	183	* Resolving the relative URI
jaroslav@1258	184	*
jaroslav@1258	185	* <blockquote>
jaroslav@1258	186	* <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java    </tt>(2)
jaroslav@1258	187	* </blockquote>
jaroslav@1258	188	*
jaroslav@1258	189	* against this result yields, in turn,
jaroslav@1258	190	*
jaroslav@1258	191	* <blockquote>
jaroslav@1258	192	* <tt>http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java</tt>
jaroslav@1258	193	* </blockquote>
jaroslav@1258	194	*
jaroslav@1258	195	* Resolution of both absolute and relative URIs, and of both absolute and
jaroslav@1258	196	* relative paths in the case of hierarchical URIs, is supported. Resolving
jaroslav@1258	197	* the URI <tt>file:///~calendar</tt> against any other URI simply yields the
jaroslav@1258	198	* original URI, since it is absolute. Resolving the relative URI (2) above
jaroslav@1258	199	* against the relative base URI (1) yields the normalized, but still relative,
jaroslav@1258	200	* URI
jaroslav@1258	201	*
jaroslav@1258	202	* <blockquote>
jaroslav@1258	203	* <tt>demo/jfc/SwingSet2/src/SwingSet2.java</tt>
jaroslav@1258	204	* </blockquote>
jaroslav@1258	205	*
jaroslav@1258	206	* <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
jaroslav@1258	207	* two normalized URIs <i>u</i> and <i>v</i>,
jaroslav@1258	208	*
jaroslav@1258	209	* <blockquote>
jaroslav@1258	210	* <i>u</i><tt>.relativize(</tt><i>u</i><tt>.resolve(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  and<br>
jaroslav@1258	211	* <i>u</i><tt>.resolve(</tt><i>u</i><tt>.relativize(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  .<br>
jaroslav@1258	212	* </blockquote>
jaroslav@1258	213	*
jaroslav@1258	214	* This operation is often useful when constructing a document containing URIs
jaroslav@1258	215	* that must be made relative to the base URI of the document wherever
jaroslav@1258	216	* possible. For example, relativizing the URI
jaroslav@1258	217	*
jaroslav@1258	218	* <blockquote>
jaroslav@1258	219	* <tt>http://java.sun.com/j2se/1.3/docs/guide/index.html</tt>
jaroslav@1258	220	* </blockquote>
jaroslav@1258	221	*
jaroslav@1258	222	* against the base URI
jaroslav@1258	223	*
jaroslav@1258	224	* <blockquote>
jaroslav@1258	225	* <tt>http://java.sun.com/j2se/1.3</tt>
jaroslav@1258	226	* </blockquote>
jaroslav@1258	227	*
jaroslav@1258	228	* yields the relative URI <tt>docs/guide/index.html</tt>.
jaroslav@1258	229	*
jaroslav@1258	230	*
jaroslav@1258	231	* <h4> Character categories </h4>
jaroslav@1258	232	*
jaroslav@1258	233	* RFC 2396 specifies precisely which characters are permitted in the
jaroslav@1258	234	* various components of a URI reference. The following categories, most of
jaroslav@1258	235	* which are taken from that specification, are used below to describe these
jaroslav@1258	236	* constraints:
jaroslav@1258	237	*
jaroslav@1258	238	* <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
jaroslav@1258	239	* <tr><th valign=top><i>alpha</i></th>
jaroslav@1258	240	* <td>The US-ASCII alphabetic characters,
jaroslav@1258	241	* <tt>'A'</tt> through <tt>'Z'</tt>
jaroslav@1258	242	* and <tt>'a'</tt> through <tt>'z'</tt></td></tr>
jaroslav@1258	243	* <tr><th valign=top><i>digit</i></th>
jaroslav@1258	244	* <td>The US-ASCII decimal digit characters,
jaroslav@1258	245	* <tt>'0'</tt> through <tt>'9'</tt></td></tr>
jaroslav@1258	246	* <tr><th valign=top><i>alphanum</i></th>
jaroslav@1258	247	* <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
jaroslav@1258	248	* <tr><th valign=top><i>unreserved</i>    </th>
jaroslav@1258	249	* <td>All <i>alphanum</i> characters together with those in the string
jaroslav@1258	250	* <tt>"_-!.~'()*"</tt></td></tr>
jaroslav@1258	251	* <tr><th valign=top><i>punct</i></th>
jaroslav@1258	252	* <td>The characters in the string <tt>",;:$&+="</tt></td></tr>
jaroslav@1258	253	* <tr><th valign=top><i>reserved</i></th>
jaroslav@1258	254	* <td>All <i>punct</i> characters together with those in the string
jaroslav@1258	255	* <tt>"?/[]@"</tt></td></tr>
jaroslav@1258	256	* <tr><th valign=top><i>escaped</i></th>
jaroslav@1258	257	* <td>Escaped octets, that is, triplets consisting of the percent
jaroslav@1258	258	* character (<tt>'%'</tt>) followed by two hexadecimal digits
jaroslav@1258	259	* (<tt>'0'</tt>-<tt>'9'</tt>, <tt>'A'</tt>-<tt>'F'</tt>, and
jaroslav@1258	260	* <tt>'a'</tt>-<tt>'f'</tt>)</td></tr>
jaroslav@1258	261	* <tr><th valign=top><i>other</i></th>
jaroslav@1258	262	* <td>The Unicode characters that are not in the US-ASCII character set,
jaroslav@1258	263	* are not control characters (according to the {@link
jaroslav@1258	264	* java.lang.Character#isISOControl(char) Character.isISOControl}
jaroslav@1258	265	* method), and are not space characters (according to the {@link
jaroslav@1258	266	* java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
jaroslav@1258	267	* method)  <i>(<b>Deviation from RFC 2396</b>, which is
jaroslav@1258	268	* limited to US-ASCII)</i></td></tr>
jaroslav@1258	269	* </table></blockquote>
jaroslav@1258	270	*
jaroslav@1258	271	* <p><a name="legal-chars"></a> The set of all legal URI characters consists of
jaroslav@1258	272	* the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
jaroslav@1258	273	* characters.
jaroslav@1258	274	*
jaroslav@1258	275	*
jaroslav@1258	276	* <h4> Escaped octets, quotation, encoding, and decoding </h4>
jaroslav@1258	277	*
jaroslav@1258	278	* RFC 2396 allows escaped octets to appear in the user-info, path, query, and
jaroslav@1258	279	* fragment components. Escaping serves two purposes in URIs:
jaroslav@1258	280	*
jaroslav@1258	281	* <ul>
jaroslav@1258	282	*
jaroslav@1258	283	* <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
jaroslav@1258	284	* conform strictly to RFC 2396 by not containing any <i>other</i>
jaroslav@1258	285	* characters. </p></li>
jaroslav@1258	286	*
jaroslav@1258	287	* <li><p> To <i>quote</i> characters that are otherwise illegal in a
jaroslav@1258	288	* component. The user-info, path, query, and fragment components differ
jaroslav@1258	289	* slightly in terms of which characters are considered legal and illegal.
jaroslav@1258	290	* </p></li>
jaroslav@1258	291	*
jaroslav@1258	292	* </ul>
jaroslav@1258	293	*
jaroslav@1258	294	* These purposes are served in this class by three related operations:
jaroslav@1258	295	*
jaroslav@1258	296	* <ul>
jaroslav@1258	297	*
jaroslav@1258	298	* <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
jaroslav@1258	299	* with the sequence of escaped octets that represent that character in the
jaroslav@1258	300	* UTF-8 character set. The Euro currency symbol (<tt>'\u20AC'</tt>),
jaroslav@1258	301	* for example, is encoded as <tt>"%E2%82%AC"</tt>. <i>(<b>Deviation from
jaroslav@1258	302	* RFC 2396</b>, which does not specify any particular character
jaroslav@1258	303	* set.)</i> </p></li>
jaroslav@1258	304	*
jaroslav@1258	305	* <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
jaroslav@1258	306	* encoding it. The space character, for example, is quoted by replacing it
jaroslav@1258	307	* with <tt>"%20"</tt>. UTF-8 contains US-ASCII, hence for US-ASCII
jaroslav@1258	308	* characters this transformation has exactly the effect required by
jaroslav@1258	309	* RFC 2396. </p></li>
jaroslav@1258	310	*
jaroslav@1258	311	* <li><p><a name="decode"></a>
jaroslav@1258	312	* A sequence of escaped octets is <i>decoded</i> by
jaroslav@1258	313	* replacing it with the sequence of characters that it represents in the
jaroslav@1258	314	* UTF-8 character set. UTF-8 contains US-ASCII, hence decoding has the
jaroslav@1258	315	* effect of de-quoting any quoted US-ASCII characters as well as that of
jaroslav@1258	316	* decoding any encoded non-US-ASCII characters. If a <a
jaroslav@1258	317	* href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
jaroslav@1258	318	* when decoding the escaped octets then the erroneous octets are replaced by
jaroslav@1258	319	* <tt>'\uFFFD'</tt>, the Unicode replacement character. </p></li>
jaroslav@1258	320	*
jaroslav@1258	321	* </ul>
jaroslav@1258	322	*
jaroslav@1258	323	* These operations are exposed in the constructors and methods of this class
jaroslav@1258	324	* as follows:
jaroslav@1258	325	*
jaroslav@1258	326	* <ul>
jaroslav@1258	327	*
jaroslav@1258	328	* <li><p> The {@link #URI(java.lang.String) <code>single-argument
jaroslav@1258	329	* constructor</code>} requires any illegal characters in its argument to be
jaroslav@1258	330	* quoted and preserves any escaped octets and <i>other</i> characters that
jaroslav@1258	331	* are present. </p></li>
jaroslav@1258	332	*
jaroslav@1258	333	* <li><p> The {@link
jaroslav@1258	334	* #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
jaroslav@1258	335	* <code>multi-argument constructors</code>} quote illegal characters as
jaroslav@1258	336	* required by the components in which they appear. The percent character
jaroslav@1258	337	* (<tt>'%'</tt>) is always quoted by these constructors. Any <i>other</i>
jaroslav@1258	338	* characters are preserved. </p></li>
jaroslav@1258	339	*
jaroslav@1258	340	* <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
jaroslav@1258	341	* getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
jaroslav@1258	342	* getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
jaroslav@1258	343	* #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
jaroslav@1258	344	* values of their corresponding components in raw form, without interpreting
jaroslav@1258	345	* any escaped octets. The strings returned by these methods may contain
jaroslav@1258	346	* both escaped octets and <i>other</i> characters, and will not contain any
jaroslav@1258	347	* illegal characters. </p></li>
jaroslav@1258	348	*
jaroslav@1258	349	* <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
jaroslav@1258	350	* getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
jaroslav@1258	351	* getFragment}, {@link #getAuthority() getAuthority}, and {@link
jaroslav@1258	352	* #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
jaroslav@1258	353	* octets in their corresponding components. The strings returned by these
jaroslav@1258	354	* methods may contain both <i>other</i> characters and illegal characters,
jaroslav@1258	355	* and will not contain any escaped octets. </p></li>
jaroslav@1258	356	*
jaroslav@1258	357	* <li><p> The {@link #toString() toString} method returns a URI string with
jaroslav@1258	358	* all necessary quotation but which may contain <i>other</i> characters.
jaroslav@1258	359	* </p></li>
jaroslav@1258	360	*
jaroslav@1258	361	* <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
jaroslav@1258	362	* quoted and encoded URI string that does not contain any <i>other</i>
jaroslav@1258	363	* characters. </p></li>
jaroslav@1258	364	*
jaroslav@1258	365	* </ul>
jaroslav@1258	366	*
jaroslav@1258	367	*
jaroslav@1258	368	* <h4> Identities </h4>
jaroslav@1258	369	*
jaroslav@1258	370	* For any URI <i>u</i>, it is always the case that
jaroslav@1258	371	*
jaroslav@1258	372	* <blockquote>
jaroslav@1258	373	* <tt>new URI(</tt><i>u</i><tt>.toString()).equals(</tt><i>u</i><tt>)</tt> .
jaroslav@1258	374	* </blockquote>
jaroslav@1258	375	*
jaroslav@1258	376	* For any URI <i>u</i> that does not contain redundant syntax such as two
jaroslav@1258	377	* slashes before an empty authority (as in <tt>file:///tmp/</tt> ) or a
jaroslav@1258	378	* colon following a host name but no port (as in
jaroslav@1258	379	* <tt>http://java.sun.com:</tt> ), and that does not encode characters
jaroslav@1258	380	* except those that must be quoted, the following identities also hold:
jaroslav@1258	381	*
jaroslav@1258	382	* <blockquote>
jaroslav@1258	383	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
jaroslav@1258	384	*         </tt><i>u</i><tt>.getSchemeSpecificPart(),<br>
jaroslav@1258	385	*         </tt><i>u</i><tt>.getFragment())<br>
jaroslav@1258	386	* .equals(</tt><i>u</i><tt>)</tt>
jaroslav@1258	387	* </blockquote>
jaroslav@1258	388	*
jaroslav@1258	389	* in all cases,
jaroslav@1258	390	*
jaroslav@1258	391	* <blockquote>
jaroslav@1258	392	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
jaroslav@1258	393	*         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getAuthority(),<br>
jaroslav@1258	394	*         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
jaroslav@1258	395	*         </tt><i>u</i><tt>.getFragment())<br>
jaroslav@1258	396	* .equals(</tt><i>u</i><tt>)</tt>
jaroslav@1258	397	* </blockquote>
jaroslav@1258	398	*
jaroslav@1258	399	* if <i>u</i> is hierarchical, and
jaroslav@1258	400	*
jaroslav@1258	401	* <blockquote>
jaroslav@1258	402	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
jaroslav@1258	403	*         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getHost(), </tt><i>u</i><tt>.getPort(),<br>
jaroslav@1258	404	*         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
jaroslav@1258	405	*         </tt><i>u</i><tt>.getFragment())<br>
jaroslav@1258	406	* .equals(</tt><i>u</i><tt>)</tt>
jaroslav@1258	407	* </blockquote>
jaroslav@1258	408	*
jaroslav@1258	409	* if <i>u</i> is hierarchical and has either no authority or a server-based
jaroslav@1258	410	* authority.
jaroslav@1258	411	*
jaroslav@1258	412	*
jaroslav@1258	413	* <h4> URIs, URLs, and URNs </h4>
jaroslav@1258	414	*
jaroslav@1258	415	* A URI is a uniform resource <i>identifier</i> while a URL is a uniform
jaroslav@1258	416	* resource <i>locator</i>. Hence every URL is a URI, abstractly speaking, but
jaroslav@1258	417	* not every URI is a URL. This is because there is another subcategory of
jaroslav@1258	418	* URIs, uniform resource <i>names</i> (URNs), which name resources but do not
jaroslav@1258	419	* specify how to locate them. The <tt>mailto</tt>, <tt>news</tt>, and
jaroslav@1258	420	* <tt>isbn</tt> URIs shown above are examples of URNs.
jaroslav@1258	421	*
jaroslav@1258	422	* <p> The conceptual distinction between URIs and URLs is reflected in the
jaroslav@1258	423	* differences between this class and the {@link URL} class.
jaroslav@1258	424	*
jaroslav@1258	425	* <p> An instance of this class represents a URI reference in the syntactic
jaroslav@1258	426	* sense defined by RFC 2396. A URI may be either absolute or relative.
jaroslav@1258	427	* A URI string is parsed according to the generic syntax without regard to the
jaroslav@1258	428	* scheme, if any, that it specifies. No lookup of the host, if any, is
jaroslav@1258	429	* performed, and no scheme-dependent stream handler is constructed. Equality,
jaroslav@1258	430	* hashing, and comparison are defined strictly in terms of the character
jaroslav@1258	431	* content of the instance. In other words, a URI instance is little more than
jaroslav@1258	432	* a structured string that supports the syntactic, scheme-independent
jaroslav@1258	433	* operations of comparison, normalization, resolution, and relativization.
jaroslav@1258	434	*
jaroslav@1258	435	* <p> An instance of the {@link URL} class, by contrast, represents the
jaroslav@1258	436	* syntactic components of a URL together with some of the information required
jaroslav@1258	437	* to access the resource that it describes. A URL must be absolute, that is,
jaroslav@1258	438	* it must always specify a scheme. A URL string is parsed according to its
jaroslav@1258	439	* scheme. A stream handler is always established for a URL, and in fact it is
jaroslav@1258	440	* impossible to create a URL instance for a scheme for which no handler is
jaroslav@1258	441	* available. Equality and hashing depend upon both the scheme and the
jaroslav@1258	442	* Internet address of the host, if any; comparison is not defined. In other
jaroslav@1258	443	* words, a URL is a structured string that supports the syntactic operation of
jaroslav@1258	444	* resolution as well as the network I/O operations of looking up the host and
jaroslav@1258	445	* opening a connection to the specified resource.
jaroslav@1258	446	*
jaroslav@1258	447	*
jaroslav@1258	448	* @author Mark Reinhold
jaroslav@1258	449	* @since 1.4
jaroslav@1258	450	*
jaroslav@1258	451	* @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279: UTF-8, a
jaroslav@1258	452	* transformation format of ISO 10646</i></a>, <br><a
jaroslav@1258	453	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6 Addressing
jaroslav@1258	454	* Architecture</i></a>, <br><a
jaroslav@1258	455	* href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC 2396: Uniform
jaroslav@1258	456	* Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
jaroslav@1258	457	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
jaroslav@1258	458	* Literal IPv6 Addresses in URLs</i></a>, <br><a
jaroslav@1258	459	* href="URISyntaxException.html">URISyntaxException</a>
jaroslav@1258	460	*/
jaroslav@1258	461
jaroslav@1258	462	public final class URI
jaroslav@1258	463	implements Comparable<URI>, Serializable
jaroslav@1258	464	{
jaroslav@1258	465
jaroslav@1258	466	// Note: Comments containing the word "ASSERT" indicate places where a
jaroslav@1258	467	// throw of an InternalError should be replaced by an appropriate assertion
jaroslav@1258	468	// statement once asserts are enabled in the build.
jaroslav@1258	469
jaroslav@1258	470	static final long serialVersionUID = -6052424284110960213L;
jaroslav@1258	471
jaroslav@1258	472
jaroslav@1258	473	// -- Properties and components of this instance --
jaroslav@1258	474
jaroslav@1258	475	// Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
jaroslav@1258	476	private transient String scheme; // null ==> relative URI
jaroslav@1258	477	private transient String fragment;
jaroslav@1258	478
jaroslav@1258	479	// Hierarchical URI components: [//<authority>]<path>[?<query>]
jaroslav@1258	480	private transient String authority; // Registry or server
jaroslav@1258	481
jaroslav@1258	482	// Server-based authority: [<userInfo>@]<host>[:<port>]
jaroslav@1258	483	private transient String userInfo;
jaroslav@1258	484	private transient String host; // null ==> registry-based
jaroslav@1258	485	private transient int port = -1; // -1 ==> undefined
jaroslav@1258	486
jaroslav@1258	487	// Remaining components of hierarchical URIs
jaroslav@1258	488	private transient String path; // null ==> opaque
jaroslav@1258	489	private transient String query;
jaroslav@1258	490
jaroslav@1258	491	// The remaining fields may be computed on demand
jaroslav@1258	492
jaroslav@1258	493	private volatile transient String schemeSpecificPart;
jaroslav@1258	494	private volatile transient int hash; // Zero ==> undefined
jaroslav@1258	495
jaroslav@1258	496	private volatile transient String decodedUserInfo = null;
jaroslav@1258	497	private volatile transient String decodedAuthority = null;
jaroslav@1258	498	private volatile transient String decodedPath = null;
jaroslav@1258	499	private volatile transient String decodedQuery = null;
jaroslav@1258	500	private volatile transient String decodedFragment = null;
jaroslav@1258	501	private volatile transient String decodedSchemeSpecificPart = null;
jaroslav@1258	502
jaroslav@1258	503	/**
jaroslav@1258	504	* The string form of this URI.
jaroslav@1258	505	*
jaroslav@1258	506	* @serial
jaroslav@1258	507	*/
jaroslav@1258	508	private volatile String string; // The only serializable field
jaroslav@1258	509
jaroslav@1258	510
jaroslav@1258	511
jaroslav@1258	512	// -- Constructors and factories --
jaroslav@1258	513
jaroslav@1258	514	private URI() { } // Used internally
jaroslav@1258	515
jaroslav@1258	516	/**
jaroslav@1258	517	* Constructs a URI by parsing the given string.
jaroslav@1258	518	*
jaroslav@1258	519	* <p> This constructor parses the given string exactly as specified by the
jaroslav@1258	520	* grammar in <a
jaroslav@1258	521	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	522	* Appendix A, <b><i>except for the following deviations:</i></b> </p>
jaroslav@1258	523	*
jaroslav@1258	524	* <ul type=disc>
jaroslav@1258	525	*
jaroslav@1258	526	* <li><p> An empty authority component is permitted as long as it is
jaroslav@1258	527	* followed by a non-empty path, a query component, or a fragment
jaroslav@1258	528	* component. This allows the parsing of URIs such as
jaroslav@1258	529	* <tt>"file:///foo/bar"</tt>, which seems to be the intent of
jaroslav@1258	530	* RFC 2396 although the grammar does not permit it. If the
jaroslav@1258	531	* authority component is empty then the user-information, host, and port
jaroslav@1258	532	* components are undefined. </p></li>
jaroslav@1258	533	*
jaroslav@1258	534	* <li><p> Empty relative paths are permitted; this seems to be the
jaroslav@1258	535	* intent of RFC 2396 although the grammar does not permit it. The
jaroslav@1258	536	* primary consequence of this deviation is that a standalone fragment
jaroslav@1258	537	* such as <tt>"#foo"</tt> parses as a relative URI with an empty path
jaroslav@1258	538	* and the given fragment, and can be usefully <a
jaroslav@1258	539	* href="#resolve-frag">resolved</a> against a base URI.
jaroslav@1258	540	*
jaroslav@1258	541	* <li><p> IPv4 addresses in host components are parsed rigorously, as
jaroslav@1258	542	* specified by <a
jaroslav@1258	543	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>: Each
jaroslav@1258	544	* element of a dotted-quad address must contain no more than three
jaroslav@1258	545	* decimal digits. Each element is further constrained to have a value
jaroslav@1258	546	* no greater than 255. </p></li>
jaroslav@1258	547	*
jaroslav@1258	548	* <li> <p> Hostnames in host components that comprise only a single
jaroslav@1258	549	* domain label are permitted to start with an <i>alphanum</i>
jaroslav@1258	550	* character. This seems to be the intent of <a
jaroslav@1258	551	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
jaroslav@1258	552	* section 3.2.2 although the grammar does not permit it. The
jaroslav@1258	553	* consequence of this deviation is that the authority component of a
jaroslav@1258	554	* hierarchical URI such as <tt>s://123</tt>, will parse as a server-based
jaroslav@1258	555	* authority. </p></li>
jaroslav@1258	556	*
jaroslav@1258	557	* <li><p> IPv6 addresses are permitted for the host component. An IPv6
jaroslav@1258	558	* address must be enclosed in square brackets (<tt>'['</tt> and
jaroslav@1258	559	* <tt>']'</tt>) as specified by <a
jaroslav@1258	560	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>. The
jaroslav@1258	561	* IPv6 address itself must parse according to <a
jaroslav@1258	562	* href="http://www.ietf.org/rfc/rfc2373.txt">RFC 2373</a>. IPv6
jaroslav@1258	563	* addresses are further constrained to describe no more than sixteen
jaroslav@1258	564	* bytes of address information, a constraint implicit in RFC 2373
jaroslav@1258	565	* but not expressible in the grammar. </p></li>
jaroslav@1258	566	*
jaroslav@1258	567	* <li><p> Characters in the <i>other</i> category are permitted wherever
jaroslav@1258	568	* RFC 2396 permits <i>escaped</i> octets, that is, in the
jaroslav@1258	569	* user-information, path, query, and fragment components, as well as in
jaroslav@1258	570	* the authority component if the authority is registry-based. This
jaroslav@1258	571	* allows URIs to contain Unicode characters beyond those in the US-ASCII
jaroslav@1258	572	* character set. </p></li>
jaroslav@1258	573	*
jaroslav@1258	574	* </ul>
jaroslav@1258	575	*
jaroslav@1258	576	* @param str The string to be parsed into a URI
jaroslav@1258	577	*
jaroslav@1258	578	* @throws NullPointerException
jaroslav@1258	579	* If <tt>str</tt> is <tt>null</tt>
jaroslav@1258	580	*
jaroslav@1258	581	* @throws URISyntaxException
jaroslav@1258	582	* If the given string violates RFC 2396, as augmented
jaroslav@1258	583	* by the above deviations
jaroslav@1258	584	*/
jaroslav@1258	585	public URI(String str) throws URISyntaxException {
jaroslav@1258	586	new Parser(str).parse(false);
jaroslav@1258	587	}
jaroslav@1258	588
jaroslav@1258	589	/**
jaroslav@1258	590	* Constructs a hierarchical URI from the given components.
jaroslav@1258	591	*
jaroslav@1258	592	* <p> If a scheme is given then the path, if also given, must either be
jaroslav@1258	593	* empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
jaroslav@1258	594	* component of the new URI may be left undefined by passing <tt>null</tt>
jaroslav@1258	595	* for the corresponding parameter or, in the case of the <tt>port</tt>
jaroslav@1258	596	* parameter, by passing <tt>-1</tt>.
jaroslav@1258	597	*
jaroslav@1258	598	* <p> This constructor first builds a URI string from the given components
jaroslav@1258	599	* according to the rules specified in <a
jaroslav@1258	600	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	601	* section 5.2, step 7: </p>
jaroslav@1258	602	*
jaroslav@1258	603	* <ol>
jaroslav@1258	604	*
jaroslav@1258	605	* <li><p> Initially, the result string is empty. </p></li>
jaroslav@1258	606	*
jaroslav@1258	607	* <li><p> If a scheme is given then it is appended to the result,
jaroslav@1258	608	* followed by a colon character (<tt>':'</tt>). </p></li>
jaroslav@1258	609	*
jaroslav@1258	610	* <li><p> If user information, a host, or a port are given then the
jaroslav@1258	611	* string <tt>"//"</tt> is appended. </p></li>
jaroslav@1258	612	*
jaroslav@1258	613	* <li><p> If user information is given then it is appended, followed by
jaroslav@1258	614	* a commercial-at character (<tt>'@'</tt>). Any character not in the
jaroslav@1258	615	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	616	* categories is <a href="#quote">quoted</a>. </p></li>
jaroslav@1258	617	*
jaroslav@1258	618	* <li><p> If a host is given then it is appended. If the host is a
jaroslav@1258	619	* literal IPv6 address but is not enclosed in square brackets
jaroslav@1258	620	* (<tt>'['</tt> and <tt>']'</tt>) then the square brackets are added.
jaroslav@1258	621	* </p></li>
jaroslav@1258	622	*
jaroslav@1258	623	* <li><p> If a port number is given then a colon character
jaroslav@1258	624	* (<tt>':'</tt>) is appended, followed by the port number in decimal.
jaroslav@1258	625	* </p></li>
jaroslav@1258	626	*
jaroslav@1258	627	* <li><p> If a path is given then it is appended. Any character not in
jaroslav@1258	628	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	629	* categories, and not equal to the slash character (<tt>'/'</tt>) or the
jaroslav@1258	630	* commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
jaroslav@1258	631	*
jaroslav@1258	632	* <li><p> If a query is given then a question-mark character
jaroslav@1258	633	* (<tt>'?'</tt>) is appended, followed by the query. Any character that
jaroslav@1258	634	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
jaroslav@1258	635	* </p></li>
jaroslav@1258	636	*
jaroslav@1258	637	* <li><p> Finally, if a fragment is given then a hash character
jaroslav@1258	638	* (<tt>'#'</tt>) is appended, followed by the fragment. Any character
jaroslav@1258	639	* that is not a legal URI character is quoted. </p></li>
jaroslav@1258	640	*
jaroslav@1258	641	* </ol>
jaroslav@1258	642	*
jaroslav@1258	643	* <p> The resulting URI string is then parsed as if by invoking the {@link
jaroslav@1258	644	* #URI(String)} constructor and then invoking the {@link
jaroslav@1258	645	* #parseServerAuthority()} method upon the result; this may cause a {@link
jaroslav@1258	646	* URISyntaxException} to be thrown. </p>
jaroslav@1258	647	*
jaroslav@1258	648	* @param scheme Scheme name
jaroslav@1258	649	* @param userInfo User name and authorization information
jaroslav@1258	650	* @param host Host name
jaroslav@1258	651	* @param port Port number
jaroslav@1258	652	* @param path Path
jaroslav@1258	653	* @param query Query
jaroslav@1258	654	* @param fragment Fragment
jaroslav@1258	655	*
jaroslav@1258	656	* @throws URISyntaxException
jaroslav@1258	657	* If both a scheme and a path are given but the path is relative,
jaroslav@1258	658	* if the URI string constructed from the given components violates
jaroslav@1258	659	* RFC 2396, or if the authority component of the string is
jaroslav@1258	660	* present but cannot be parsed as a server-based authority
jaroslav@1258	661	*/
jaroslav@1258	662	public URI(String scheme,
jaroslav@1258	663	String userInfo, String host, int port,
jaroslav@1258	664	String path, String query, String fragment)
jaroslav@1258	665	throws URISyntaxException
jaroslav@1258	666	{
jaroslav@1258	667	String s = toString(scheme, null,
jaroslav@1258	668	null, userInfo, host, port,
jaroslav@1258	669	path, query, fragment);
jaroslav@1258	670	checkPath(s, scheme, path);
jaroslav@1258	671	new Parser(s).parse(true);
jaroslav@1258	672	}
jaroslav@1258	673
jaroslav@1258	674	/**
jaroslav@1258	675	* Constructs a hierarchical URI from the given components.
jaroslav@1258	676	*
jaroslav@1258	677	* <p> If a scheme is given then the path, if also given, must either be
jaroslav@1258	678	* empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
jaroslav@1258	679	* component of the new URI may be left undefined by passing <tt>null</tt>
jaroslav@1258	680	* for the corresponding parameter.
jaroslav@1258	681	*
jaroslav@1258	682	* <p> This constructor first builds a URI string from the given components
jaroslav@1258	683	* according to the rules specified in <a
jaroslav@1258	684	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	685	* section 5.2, step 7: </p>
jaroslav@1258	686	*
jaroslav@1258	687	* <ol>
jaroslav@1258	688	*
jaroslav@1258	689	* <li><p> Initially, the result string is empty. </p></li>
jaroslav@1258	690	*
jaroslav@1258	691	* <li><p> If a scheme is given then it is appended to the result,
jaroslav@1258	692	* followed by a colon character (<tt>':'</tt>). </p></li>
jaroslav@1258	693	*
jaroslav@1258	694	* <li><p> If an authority is given then the string <tt>"//"</tt> is
jaroslav@1258	695	* appended, followed by the authority. If the authority contains a
jaroslav@1258	696	* literal IPv6 address then the address must be enclosed in square
jaroslav@1258	697	* brackets (<tt>'['</tt> and <tt>']'</tt>). Any character not in the
jaroslav@1258	698	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	699	* categories, and not equal to the commercial-at character
jaroslav@1258	700	* (<tt>'@'</tt>), is <a href="#quote">quoted</a>. </p></li>
jaroslav@1258	701	*
jaroslav@1258	702	* <li><p> If a path is given then it is appended. Any character not in
jaroslav@1258	703	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	704	* categories, and not equal to the slash character (<tt>'/'</tt>) or the
jaroslav@1258	705	* commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
jaroslav@1258	706	*
jaroslav@1258	707	* <li><p> If a query is given then a question-mark character
jaroslav@1258	708	* (<tt>'?'</tt>) is appended, followed by the query. Any character that
jaroslav@1258	709	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
jaroslav@1258	710	* </p></li>
jaroslav@1258	711	*
jaroslav@1258	712	* <li><p> Finally, if a fragment is given then a hash character
jaroslav@1258	713	* (<tt>'#'</tt>) is appended, followed by the fragment. Any character
jaroslav@1258	714	* that is not a legal URI character is quoted. </p></li>
jaroslav@1258	715	*
jaroslav@1258	716	* </ol>
jaroslav@1258	717	*
jaroslav@1258	718	* <p> The resulting URI string is then parsed as if by invoking the {@link
jaroslav@1258	719	* #URI(String)} constructor and then invoking the {@link
jaroslav@1258	720	* #parseServerAuthority()} method upon the result; this may cause a {@link
jaroslav@1258	721	* URISyntaxException} to be thrown. </p>
jaroslav@1258	722	*
jaroslav@1258	723	* @param scheme Scheme name
jaroslav@1258	724	* @param authority Authority
jaroslav@1258	725	* @param path Path
jaroslav@1258	726	* @param query Query
jaroslav@1258	727	* @param fragment Fragment
jaroslav@1258	728	*
jaroslav@1258	729	* @throws URISyntaxException
jaroslav@1258	730	* If both a scheme and a path are given but the path is relative,
jaroslav@1258	731	* if the URI string constructed from the given components violates
jaroslav@1258	732	* RFC 2396, or if the authority component of the string is
jaroslav@1258	733	* present but cannot be parsed as a server-based authority
jaroslav@1258	734	*/
jaroslav@1258	735	public URI(String scheme,
jaroslav@1258	736	String authority,
jaroslav@1258	737	String path, String query, String fragment)
jaroslav@1258	738	throws URISyntaxException
jaroslav@1258	739	{
jaroslav@1258	740	String s = toString(scheme, null,
jaroslav@1258	741	authority, null, null, -1,
jaroslav@1258	742	path, query, fragment);
jaroslav@1258	743	checkPath(s, scheme, path);
jaroslav@1258	744	new Parser(s).parse(false);
jaroslav@1258	745	}
jaroslav@1258	746
jaroslav@1258	747	/**
jaroslav@1258	748	* Constructs a hierarchical URI from the given components.
jaroslav@1258	749	*
jaroslav@1258	750	* <p> A component may be left undefined by passing <tt>null</tt>.
jaroslav@1258	751	*
jaroslav@1258	752	* <p> This convenience constructor works as if by invoking the
jaroslav@1258	753	* seven-argument constructor as follows:
jaroslav@1258	754	*
jaroslav@1258	755	* <blockquote><tt>
jaroslav@1258	756	* new {@link #URI(String, String, String, int, String, String, String)
jaroslav@1258	757	* URI}(scheme, null, host, -1, path, null, fragment);
jaroslav@1258	758	* </tt></blockquote>
jaroslav@1258	759	*
jaroslav@1258	760	* @param scheme Scheme name
jaroslav@1258	761	* @param host Host name
jaroslav@1258	762	* @param path Path
jaroslav@1258	763	* @param fragment Fragment
jaroslav@1258	764	*
jaroslav@1258	765	* @throws URISyntaxException
jaroslav@1258	766	* If the URI string constructed from the given components
jaroslav@1258	767	* violates RFC 2396
jaroslav@1258	768	*/
jaroslav@1258	769	public URI(String scheme, String host, String path, String fragment)
jaroslav@1258	770	throws URISyntaxException
jaroslav@1258	771	{
jaroslav@1258	772	this(scheme, null, host, -1, path, null, fragment);
jaroslav@1258	773	}
jaroslav@1258	774
jaroslav@1258	775	/**
jaroslav@1258	776	* Constructs a URI from the given components.
jaroslav@1258	777	*
jaroslav@1258	778	* <p> A component may be left undefined by passing <tt>null</tt>.
jaroslav@1258	779	*
jaroslav@1258	780	* <p> This constructor first builds a URI in string form using the given
jaroslav@1258	781	* components as follows: </p>
jaroslav@1258	782	*
jaroslav@1258	783	* <ol>
jaroslav@1258	784	*
jaroslav@1258	785	* <li><p> Initially, the result string is empty. </p></li>
jaroslav@1258	786	*
jaroslav@1258	787	* <li><p> If a scheme is given then it is appended to the result,
jaroslav@1258	788	* followed by a colon character (<tt>':'</tt>). </p></li>
jaroslav@1258	789	*
jaroslav@1258	790	* <li><p> If a scheme-specific part is given then it is appended. Any
jaroslav@1258	791	* character that is not a <a href="#legal-chars">legal URI character</a>
jaroslav@1258	792	* is <a href="#quote">quoted</a>. </p></li>
jaroslav@1258	793	*
jaroslav@1258	794	* <li><p> Finally, if a fragment is given then a hash character
jaroslav@1258	795	* (<tt>'#'</tt>) is appended to the string, followed by the fragment.
jaroslav@1258	796	* Any character that is not a legal URI character is quoted. </p></li>
jaroslav@1258	797	*
jaroslav@1258	798	* </ol>
jaroslav@1258	799	*
jaroslav@1258	800	* <p> The resulting URI string is then parsed in order to create the new
jaroslav@1258	801	* URI instance as if by invoking the {@link #URI(String)} constructor;
jaroslav@1258	802	* this may cause a {@link URISyntaxException} to be thrown. </p>
jaroslav@1258	803	*
jaroslav@1258	804	* @param scheme Scheme name
jaroslav@1258	805	* @param ssp Scheme-specific part
jaroslav@1258	806	* @param fragment Fragment
jaroslav@1258	807	*
jaroslav@1258	808	* @throws URISyntaxException
jaroslav@1258	809	* If the URI string constructed from the given components
jaroslav@1258	810	* violates RFC 2396
jaroslav@1258	811	*/
jaroslav@1258	812	public URI(String scheme, String ssp, String fragment)
jaroslav@1258	813	throws URISyntaxException
jaroslav@1258	814	{
jaroslav@1258	815	new Parser(toString(scheme, ssp,
jaroslav@1258	816	null, null, null, -1,
jaroslav@1258	817	null, null, fragment))
jaroslav@1258	818	.parse(false);
jaroslav@1258	819	}
jaroslav@1258	820
jaroslav@1258	821	/**
jaroslav@1258	822	* Creates a URI by parsing the given string.
jaroslav@1258	823	*
jaroslav@1258	824	* <p> This convenience factory method works as if by invoking the {@link
jaroslav@1258	825	* #URI(String)} constructor; any {@link URISyntaxException} thrown by the
jaroslav@1258	826	* constructor is caught and wrapped in a new {@link
jaroslav@1258	827	* IllegalArgumentException} object, which is then thrown.
jaroslav@1258	828	*
jaroslav@1258	829	* <p> This method is provided for use in situations where it is known that
jaroslav@1258	830	* the given string is a legal URI, for example for URI constants declared
jaroslav@1258	831	* within in a program, and so it would be considered a programming error
jaroslav@1258	832	* for the string not to parse as such. The constructors, which throw
jaroslav@1258	833	* {@link URISyntaxException} directly, should be used situations where a
jaroslav@1258	834	* URI is being constructed from user input or from some other source that
jaroslav@1258	835	* may be prone to errors. </p>
jaroslav@1258	836	*
jaroslav@1258	837	* @param str The string to be parsed into a URI
jaroslav@1258	838	* @return The new URI
jaroslav@1258	839	*
jaroslav@1258	840	* @throws NullPointerException
jaroslav@1258	841	* If <tt>str</tt> is <tt>null</tt>
jaroslav@1258	842	*
jaroslav@1258	843	* @throws IllegalArgumentException
jaroslav@1258	844	* If the given string violates RFC 2396
jaroslav@1258	845	*/
jaroslav@1258	846	public static URI create(String str) {
jaroslav@1258	847	try {
jaroslav@1258	848	return new URI(str);
jaroslav@1258	849	} catch (URISyntaxException x) {
jaroslav@1258	850	throw new IllegalArgumentException(x.getMessage(), x);
jaroslav@1258	851	}
jaroslav@1258	852	}
jaroslav@1258	853
jaroslav@1258	854
jaroslav@1258	855	// -- Operations --
jaroslav@1258	856
jaroslav@1258	857	/**
jaroslav@1258	858	* Attempts to parse this URI's authority component, if defined, into
jaroslav@1258	859	* user-information, host, and port components.
jaroslav@1258	860	*
jaroslav@1258	861	* <p> If this URI's authority component has already been recognized as
jaroslav@1258	862	* being server-based then it will already have been parsed into
jaroslav@1258	863	* user-information, host, and port components. In this case, or if this
jaroslav@1258	864	* URI has no authority component, this method simply returns this URI.
jaroslav@1258	865	*
jaroslav@1258	866	* <p> Otherwise this method attempts once more to parse the authority
jaroslav@1258	867	* component into user-information, host, and port components, and throws
jaroslav@1258	868	* an exception describing why the authority component could not be parsed
jaroslav@1258	869	* in that way.
jaroslav@1258	870	*
jaroslav@1258	871	* <p> This method is provided because the generic URI syntax specified in
jaroslav@1258	872	* <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
jaroslav@1258	873	* cannot always distinguish a malformed server-based authority from a
jaroslav@1258	874	* legitimate registry-based authority. It must therefore treat some
jaroslav@1258	875	* instances of the former as instances of the latter. The authority
jaroslav@1258	876	* component in the URI string <tt>"//foo:bar"</tt>, for example, is not a
jaroslav@1258	877	* legal server-based authority but it is legal as a registry-based
jaroslav@1258	878	* authority.
jaroslav@1258	879	*
jaroslav@1258	880	* <p> In many common situations, for example when working URIs that are
jaroslav@1258	881	* known to be either URNs or URLs, the hierarchical URIs being used will
jaroslav@1258	882	* always be server-based. They therefore must either be parsed as such or
jaroslav@1258	883	* treated as an error. In these cases a statement such as
jaroslav@1258	884	*
jaroslav@1258	885	* <blockquote>
jaroslav@1258	886	* <tt>URI </tt><i>u</i><tt> = new URI(str).parseServerAuthority();</tt>
jaroslav@1258	887	* </blockquote>
jaroslav@1258	888	*
jaroslav@1258	889	* <p> can be used to ensure that <i>u</i> always refers to a URI that, if
jaroslav@1258	890	* it has an authority component, has a server-based authority with proper
jaroslav@1258	891	* user-information, host, and port components. Invoking this method also
jaroslav@1258	892	* ensures that if the authority could not be parsed in that way then an
jaroslav@1258	893	* appropriate diagnostic message can be issued based upon the exception
jaroslav@1258	894	* that is thrown. </p>
jaroslav@1258	895	*
jaroslav@1258	896	* @return A URI whose authority field has been parsed
jaroslav@1258	897	* as a server-based authority
jaroslav@1258	898	*
jaroslav@1258	899	* @throws URISyntaxException
jaroslav@1258	900	* If the authority component of this URI is defined
jaroslav@1258	901	* but cannot be parsed as a server-based authority
jaroslav@1258	902	* according to RFC 2396
jaroslav@1258	903	*/
jaroslav@1258	904	public URI parseServerAuthority()
jaroslav@1258	905	throws URISyntaxException
jaroslav@1258	906	{
jaroslav@1258	907	// We could be clever and cache the error message and index from the
jaroslav@1258	908	// exception thrown during the original parse, but that would require
jaroslav@1258	909	// either more fields or a more-obscure representation.
jaroslav@1258	910	if ((host != null) \|\| (authority == null))
jaroslav@1258	911	return this;
jaroslav@1258	912	defineString();
jaroslav@1258	913	new Parser(string).parse(true);
jaroslav@1258	914	return this;
jaroslav@1258	915	}
jaroslav@1258	916
jaroslav@1258	917	/**
jaroslav@1258	918	* Normalizes this URI's path.
jaroslav@1258	919	*
jaroslav@1258	920	* <p> If this URI is opaque, or if its path is already in normal form,
jaroslav@1258	921	* then this URI is returned. Otherwise a new URI is constructed that is
jaroslav@1258	922	* identical to this URI except that its path is computed by normalizing
jaroslav@1258	923	* this URI's path in a manner consistent with <a
jaroslav@1258	924	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	925	* section 5.2, step 6, sub-steps c through f; that is:
jaroslav@1258	926	* </p>
jaroslav@1258	927	*
jaroslav@1258	928	* <ol>
jaroslav@1258	929	*
jaroslav@1258	930	* <li><p> All <tt>"."</tt> segments are removed. </p></li>
jaroslav@1258	931	*
jaroslav@1258	932	* <li><p> If a <tt>".."</tt> segment is preceded by a non-<tt>".."</tt>
jaroslav@1258	933	* segment then both of these segments are removed. This step is
jaroslav@1258	934	* repeated until it is no longer applicable. </p></li>
jaroslav@1258	935	*
jaroslav@1258	936	* <li><p> If the path is relative, and if its first segment contains a
jaroslav@1258	937	* colon character (<tt>':'</tt>), then a <tt>"."</tt> segment is
jaroslav@1258	938	* prepended. This prevents a relative URI with a path such as
jaroslav@1258	939	* <tt>"a:b/c/d"</tt> from later being re-parsed as an opaque URI with a
jaroslav@1258	940	* scheme of <tt>"a"</tt> and a scheme-specific part of <tt>"b/c/d"</tt>.
jaroslav@1258	941	* <b><i>(Deviation from RFC 2396)</i></b> </p></li>
jaroslav@1258	942	*
jaroslav@1258	943	* </ol>
jaroslav@1258	944	*
jaroslav@1258	945	* <p> A normalized path will begin with one or more <tt>".."</tt> segments
jaroslav@1258	946	* if there were insufficient non-<tt>".."</tt> segments preceding them to
jaroslav@1258	947	* allow their removal. A normalized path will begin with a <tt>"."</tt>
jaroslav@1258	948	* segment if one was inserted by step 3 above. Otherwise, a normalized
jaroslav@1258	949	* path will not contain any <tt>"."</tt> or <tt>".."</tt> segments. </p>
jaroslav@1258	950	*
jaroslav@1258	951	* @return A URI equivalent to this URI,
jaroslav@1258	952	* but whose path is in normal form
jaroslav@1258	953	*/
jaroslav@1258	954	public URI normalize() {
jaroslav@1258	955	return normalize(this);
jaroslav@1258	956	}
jaroslav@1258	957
jaroslav@1258	958	/**
jaroslav@1258	959	* Resolves the given URI against this URI.
jaroslav@1258	960	*
jaroslav@1258	961	* <p> If the given URI is already absolute, or if this URI is opaque, then
jaroslav@1258	962	* the given URI is returned.
jaroslav@1258	963	*
jaroslav@1258	964	* <p><a name="resolve-frag"></a> If the given URI's fragment component is
jaroslav@1258	965	* defined, its path component is empty, and its scheme, authority, and
jaroslav@1258	966	* query components are undefined, then a URI with the given fragment but
jaroslav@1258	967	* with all other components equal to those of this URI is returned. This
jaroslav@1258	968	* allows a URI representing a standalone fragment reference, such as
jaroslav@1258	969	* <tt>"#foo"</tt>, to be usefully resolved against a base URI.
jaroslav@1258	970	*
jaroslav@1258	971	* <p> Otherwise this method constructs a new hierarchical URI in a manner
jaroslav@1258	972	* consistent with <a
jaroslav@1258	973	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	974	* section 5.2; that is: </p>
jaroslav@1258	975	*
jaroslav@1258	976	* <ol>
jaroslav@1258	977	*
jaroslav@1258	978	* <li><p> A new URI is constructed with this URI's scheme and the given
jaroslav@1258	979	* URI's query and fragment components. </p></li>
jaroslav@1258	980	*
jaroslav@1258	981	* <li><p> If the given URI has an authority component then the new URI's
jaroslav@1258	982	* authority and path are taken from the given URI. </p></li>
jaroslav@1258	983	*
jaroslav@1258	984	* <li><p> Otherwise the new URI's authority component is copied from
jaroslav@1258	985	* this URI, and its path is computed as follows: </p>
jaroslav@1258	986	*
jaroslav@1258	987	* <ol type=a>
jaroslav@1258	988	*
jaroslav@1258	989	* <li><p> If the given URI's path is absolute then the new URI's path
jaroslav@1258	990	* is taken from the given URI. </p></li>
jaroslav@1258	991	*
jaroslav@1258	992	* <li><p> Otherwise the given URI's path is relative, and so the new
jaroslav@1258	993	* URI's path is computed by resolving the path of the given URI
jaroslav@1258	994	* against the path of this URI. This is done by concatenating all but
jaroslav@1258	995	* the last segment of this URI's path, if any, with the given URI's
jaroslav@1258	996	* path and then normalizing the result as if by invoking the {@link
jaroslav@1258	997	* #normalize() normalize} method. </p></li>
jaroslav@1258	998	*
jaroslav@1258	999	* </ol></li>
jaroslav@1258	1000	*
jaroslav@1258	1001	* </ol>
jaroslav@1258	1002	*
jaroslav@1258	1003	* <p> The result of this method is absolute if, and only if, either this
jaroslav@1258	1004	* URI is absolute or the given URI is absolute. </p>
jaroslav@1258	1005	*
jaroslav@1258	1006	* @param uri The URI to be resolved against this URI
jaroslav@1258	1007	* @return The resulting URI
jaroslav@1258	1008	*
jaroslav@1258	1009	* @throws NullPointerException
jaroslav@1258	1010	* If <tt>uri</tt> is <tt>null</tt>
jaroslav@1258	1011	*/
jaroslav@1258	1012	public URI resolve(URI uri) {
jaroslav@1258	1013	return resolve(this, uri);
jaroslav@1258	1014	}
jaroslav@1258	1015
jaroslav@1258	1016	/**
jaroslav@1258	1017	* Constructs a new URI by parsing the given string and then resolving it
jaroslav@1258	1018	* against this URI.
jaroslav@1258	1019	*
jaroslav@1258	1020	* <p> This convenience method works as if invoking it were equivalent to
jaroslav@1258	1021	* evaluating the expression <tt>{@link #resolve(java.net.URI)
jaroslav@1258	1022	* resolve}(URI.{@link #create(String) create}(str))</tt>. </p>
jaroslav@1258	1023	*
jaroslav@1258	1024	* @param str The string to be parsed into a URI
jaroslav@1258	1025	* @return The resulting URI
jaroslav@1258	1026	*
jaroslav@1258	1027	* @throws NullPointerException
jaroslav@1258	1028	* If <tt>str</tt> is <tt>null</tt>
jaroslav@1258	1029	*
jaroslav@1258	1030	* @throws IllegalArgumentException
jaroslav@1258	1031	* If the given string violates RFC 2396
jaroslav@1258	1032	*/
jaroslav@1258	1033	public URI resolve(String str) {
jaroslav@1258	1034	return resolve(URI.create(str));
jaroslav@1258	1035	}
jaroslav@1258	1036
jaroslav@1258	1037	/**
jaroslav@1258	1038	* Relativizes the given URI against this URI.
jaroslav@1258	1039	*
jaroslav@1258	1040	* <p> The relativization of the given URI against this URI is computed as
jaroslav@1258	1041	* follows: </p>
jaroslav@1258	1042	*
jaroslav@1258	1043	* <ol>
jaroslav@1258	1044	*
jaroslav@1258	1045	* <li><p> If either this URI or the given URI are opaque, or if the
jaroslav@1258	1046	* scheme and authority components of the two URIs are not identical, or
jaroslav@1258	1047	* if the path of this URI is not a prefix of the path of the given URI,
jaroslav@1258	1048	* then the given URI is returned. </p></li>
jaroslav@1258	1049	*
jaroslav@1258	1050	* <li><p> Otherwise a new relative hierarchical URI is constructed with
jaroslav@1258	1051	* query and fragment components taken from the given URI and with a path
jaroslav@1258	1052	* component computed by removing this URI's path from the beginning of
jaroslav@1258	1053	* the given URI's path. </p></li>
jaroslav@1258	1054	*
jaroslav@1258	1055	* </ol>
jaroslav@1258	1056	*
jaroslav@1258	1057	* @param uri The URI to be relativized against this URI
jaroslav@1258	1058	* @return The resulting URI
jaroslav@1258	1059	*
jaroslav@1258	1060	* @throws NullPointerException
jaroslav@1258	1061	* If <tt>uri</tt> is <tt>null</tt>
jaroslav@1258	1062	*/
jaroslav@1258	1063	public URI relativize(URI uri) {
jaroslav@1258	1064	return relativize(this, uri);
jaroslav@1258	1065	}
jaroslav@1258	1066
jaroslav@1258	1067	/**
jaroslav@1258	1068	* Constructs a URL from this URI.
jaroslav@1258	1069	*
jaroslav@1258	1070	* <p> This convenience method works as if invoking it were equivalent to
jaroslav@1258	1071	* evaluating the expression <tt>new URL(this.toString())</tt> after
jaroslav@1258	1072	* first checking that this URI is absolute. </p>
jaroslav@1258	1073	*
jaroslav@1258	1074	* @return A URL constructed from this URI
jaroslav@1258	1075	*
jaroslav@1258	1076	* @throws IllegalArgumentException
jaroslav@1258	1077	* If this URL is not absolute
jaroslav@1258	1078	*
jaroslav@1258	1079	* @throws MalformedURLException
jaroslav@1258	1080	* If a protocol handler for the URL could not be found,
jaroslav@1258	1081	* or if some other error occurred while constructing the URL
jaroslav@1258	1082	*/
jaroslav@1258	1083	public URL toURL()
jaroslav@1258	1084	throws MalformedURLException {
jaroslav@1258	1085	if (!isAbsolute())
jaroslav@1258	1086	throw new IllegalArgumentException("URI is not absolute");
jaroslav@1258	1087	return new URL(toString());
jaroslav@1258	1088	}
jaroslav@1258	1089
jaroslav@1258	1090	// -- Component access methods --
jaroslav@1258	1091
jaroslav@1258	1092	/**
jaroslav@1258	1093	* Returns the scheme component of this URI.
jaroslav@1258	1094	*
jaroslav@1258	1095	* <p> The scheme component of a URI, if defined, only contains characters
jaroslav@1258	1096	* in the <i>alphanum</i> category and in the string <tt>"-.+"</tt>. A
jaroslav@1258	1097	* scheme always starts with an <i>alpha</i> character. <p>
jaroslav@1258	1098	*
jaroslav@1258	1099	* The scheme component of a URI cannot contain escaped octets, hence this
jaroslav@1258	1100	* method does not perform any decoding.
jaroslav@1258	1101	*
jaroslav@1258	1102	* @return The scheme component of this URI,
jaroslav@1258	1103	* or <tt>null</tt> if the scheme is undefined
jaroslav@1258	1104	*/
jaroslav@1258	1105	public String getScheme() {
jaroslav@1258	1106	return scheme;
jaroslav@1258	1107	}
jaroslav@1258	1108
jaroslav@1258	1109	/**
jaroslav@1258	1110	* Tells whether or not this URI is absolute.
jaroslav@1258	1111	*
jaroslav@1258	1112	* <p> A URI is absolute if, and only if, it has a scheme component. </p>
jaroslav@1258	1113	*
jaroslav@1258	1114	* @return <tt>true</tt> if, and only if, this URI is absolute
jaroslav@1258	1115	*/
jaroslav@1258	1116	public boolean isAbsolute() {
jaroslav@1258	1117	return scheme != null;
jaroslav@1258	1118	}
jaroslav@1258	1119
jaroslav@1258	1120	/**
jaroslav@1258	1121	* Tells whether or not this URI is opaque.
jaroslav@1258	1122	*
jaroslav@1258	1123	* <p> A URI is opaque if, and only if, it is absolute and its
jaroslav@1258	1124	* scheme-specific part does not begin with a slash character ('/').
jaroslav@1258	1125	* An opaque URI has a scheme, a scheme-specific part, and possibly
jaroslav@1258	1126	* a fragment; all other components are undefined. </p>
jaroslav@1258	1127	*
jaroslav@1258	1128	* @return <tt>true</tt> if, and only if, this URI is opaque
jaroslav@1258	1129	*/
jaroslav@1258	1130	public boolean isOpaque() {
jaroslav@1258	1131	return path == null;
jaroslav@1258	1132	}
jaroslav@1258	1133
jaroslav@1258	1134	/**
jaroslav@1258	1135	* Returns the raw scheme-specific part of this URI. The scheme-specific
jaroslav@1258	1136	* part is never undefined, though it may be empty.
jaroslav@1258	1137	*
jaroslav@1258	1138	* <p> The scheme-specific part of a URI only contains legal URI
jaroslav@1258	1139	* characters. </p>
jaroslav@1258	1140	*
jaroslav@1258	1141	* @return The raw scheme-specific part of this URI
jaroslav@1258	1142	* (never <tt>null</tt>)
jaroslav@1258	1143	*/
jaroslav@1258	1144	public String getRawSchemeSpecificPart() {
jaroslav@1258	1145	defineSchemeSpecificPart();
jaroslav@1258	1146	return schemeSpecificPart;
jaroslav@1258	1147	}
jaroslav@1258	1148
jaroslav@1258	1149	/**
jaroslav@1258	1150	* Returns the decoded scheme-specific part of this URI.
jaroslav@1258	1151	*
jaroslav@1258	1152	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1153	* {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
jaroslav@1258	1154	* except that all sequences of escaped octets are <a
jaroslav@1258	1155	* href="#decode">decoded</a>. </p>
jaroslav@1258	1156	*
jaroslav@1258	1157	* @return The decoded scheme-specific part of this URI
jaroslav@1258	1158	* (never <tt>null</tt>)
jaroslav@1258	1159	*/
jaroslav@1258	1160	public String getSchemeSpecificPart() {
jaroslav@1258	1161	if (decodedSchemeSpecificPart == null)
jaroslav@1258	1162	decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
jaroslav@1258	1163	return decodedSchemeSpecificPart;
jaroslav@1258	1164	}
jaroslav@1258	1165
jaroslav@1258	1166	/**
jaroslav@1258	1167	* Returns the raw authority component of this URI.
jaroslav@1258	1168	*
jaroslav@1258	1169	* <p> The authority component of a URI, if defined, only contains the
jaroslav@1258	1170	* commercial-at character (<tt>'@'</tt>) and characters in the
jaroslav@1258	1171	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
jaroslav@1258	1172	* categories. If the authority is server-based then it is further
jaroslav@1258	1173	* constrained to have valid user-information, host, and port
jaroslav@1258	1174	* components. </p>
jaroslav@1258	1175	*
jaroslav@1258	1176	* @return The raw authority component of this URI,
jaroslav@1258	1177	* or <tt>null</tt> if the authority is undefined
jaroslav@1258	1178	*/
jaroslav@1258	1179	public String getRawAuthority() {
jaroslav@1258	1180	return authority;
jaroslav@1258	1181	}
jaroslav@1258	1182
jaroslav@1258	1183	/**
jaroslav@1258	1184	* Returns the decoded authority component of this URI.
jaroslav@1258	1185	*
jaroslav@1258	1186	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1187	* {@link #getRawAuthority() getRawAuthority} method except that all
jaroslav@1258	1188	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1189	*
jaroslav@1258	1190	* @return The decoded authority component of this URI,
jaroslav@1258	1191	* or <tt>null</tt> if the authority is undefined
jaroslav@1258	1192	*/
jaroslav@1258	1193	public String getAuthority() {
jaroslav@1258	1194	if (decodedAuthority == null)
jaroslav@1258	1195	decodedAuthority = decode(authority);
jaroslav@1258	1196	return decodedAuthority;
jaroslav@1258	1197	}
jaroslav@1258	1198
jaroslav@1258	1199	/**
jaroslav@1258	1200	* Returns the raw user-information component of this URI.
jaroslav@1258	1201	*
jaroslav@1258	1202	* <p> The user-information component of a URI, if defined, only contains
jaroslav@1258	1203	* characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
jaroslav@1258	1204	* <i>other</i> categories. </p>
jaroslav@1258	1205	*
jaroslav@1258	1206	* @return The raw user-information component of this URI,
jaroslav@1258	1207	* or <tt>null</tt> if the user information is undefined
jaroslav@1258	1208	*/
jaroslav@1258	1209	public String getRawUserInfo() {
jaroslav@1258	1210	return userInfo;
jaroslav@1258	1211	}
jaroslav@1258	1212
jaroslav@1258	1213	/**
jaroslav@1258	1214	* Returns the decoded user-information component of this URI.
jaroslav@1258	1215	*
jaroslav@1258	1216	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1217	* {@link #getRawUserInfo() getRawUserInfo} method except that all
jaroslav@1258	1218	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1219	*
jaroslav@1258	1220	* @return The decoded user-information component of this URI,
jaroslav@1258	1221	* or <tt>null</tt> if the user information is undefined
jaroslav@1258	1222	*/
jaroslav@1258	1223	public String getUserInfo() {
jaroslav@1258	1224	if ((decodedUserInfo == null) && (userInfo != null))
jaroslav@1258	1225	decodedUserInfo = decode(userInfo);
jaroslav@1258	1226	return decodedUserInfo;
jaroslav@1258	1227	}
jaroslav@1258	1228
jaroslav@1258	1229	/**
jaroslav@1258	1230	* Returns the host component of this URI.
jaroslav@1258	1231	*
jaroslav@1258	1232	* <p> The host component of a URI, if defined, will have one of the
jaroslav@1258	1233	* following forms: </p>
jaroslav@1258	1234	*
jaroslav@1258	1235	* <ul type=disc>
jaroslav@1258	1236	*
jaroslav@1258	1237	* <li><p> A domain name consisting of one or more <i>labels</i>
jaroslav@1258	1238	* separated by period characters (<tt>'.'</tt>), optionally followed by
jaroslav@1258	1239	* a period character. Each label consists of <i>alphanum</i> characters
jaroslav@1258	1240	* as well as hyphen characters (<tt>'-'</tt>), though hyphens never
jaroslav@1258	1241	* occur as the first or last characters in a label. The rightmost
jaroslav@1258	1242	* label of a domain name consisting of two or more labels, begins
jaroslav@1258	1243	* with an <i>alpha</i> character. </li>
jaroslav@1258	1244	*
jaroslav@1258	1245	* <li><p> A dotted-quad IPv4 address of the form
jaroslav@1258	1246	* <i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+</tt>,
jaroslav@1258	1247	* where no <i>digit</i> sequence is longer than three characters and no
jaroslav@1258	1248	* sequence has a value larger than 255. </p></li>
jaroslav@1258	1249	*
jaroslav@1258	1250	* <li><p> An IPv6 address enclosed in square brackets (<tt>'['</tt> and
jaroslav@1258	1251	* <tt>']'</tt>) and consisting of hexadecimal digits, colon characters
jaroslav@1258	1252	* (<tt>':'</tt>), and possibly an embedded IPv4 address. The full
jaroslav@1258	1253	* syntax of IPv6 addresses is specified in <a
jaroslav@1258	1254	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6
jaroslav@1258	1255	* Addressing Architecture</i></a>. </p></li>
jaroslav@1258	1256	*
jaroslav@1258	1257	* </ul>
jaroslav@1258	1258	*
jaroslav@1258	1259	* The host component of a URI cannot contain escaped octets, hence this
jaroslav@1258	1260	* method does not perform any decoding.
jaroslav@1258	1261	*
jaroslav@1258	1262	* @return The host component of this URI,
jaroslav@1258	1263	* or <tt>null</tt> if the host is undefined
jaroslav@1258	1264	*/
jaroslav@1258	1265	public String getHost() {
jaroslav@1258	1266	return host;
jaroslav@1258	1267	}
jaroslav@1258	1268
jaroslav@1258	1269	/**
jaroslav@1258	1270	* Returns the port number of this URI.
jaroslav@1258	1271	*
jaroslav@1258	1272	* <p> The port component of a URI, if defined, is a non-negative
jaroslav@1258	1273	* integer. </p>
jaroslav@1258	1274	*
jaroslav@1258	1275	* @return The port component of this URI,
jaroslav@1258	1276	* or <tt>-1</tt> if the port is undefined
jaroslav@1258	1277	*/
jaroslav@1258	1278	public int getPort() {
jaroslav@1258	1279	return port;
jaroslav@1258	1280	}
jaroslav@1258	1281
jaroslav@1258	1282	/**
jaroslav@1258	1283	* Returns the raw path component of this URI.
jaroslav@1258	1284	*
jaroslav@1258	1285	* <p> The path component of a URI, if defined, only contains the slash
jaroslav@1258	1286	* character (<tt>'/'</tt>), the commercial-at character (<tt>'@'</tt>),
jaroslav@1258	1287	* and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
jaroslav@1258	1288	* and <i>other</i> categories. </p>
jaroslav@1258	1289	*
jaroslav@1258	1290	* @return The path component of this URI,
jaroslav@1258	1291	* or <tt>null</tt> if the path is undefined
jaroslav@1258	1292	*/
jaroslav@1258	1293	public String getRawPath() {
jaroslav@1258	1294	return path;
jaroslav@1258	1295	}
jaroslav@1258	1296
jaroslav@1258	1297	/**
jaroslav@1258	1298	* Returns the decoded path component of this URI.
jaroslav@1258	1299	*
jaroslav@1258	1300	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1301	* {@link #getRawPath() getRawPath} method except that all sequences of
jaroslav@1258	1302	* escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1303	*
jaroslav@1258	1304	* @return The decoded path component of this URI,
jaroslav@1258	1305	* or <tt>null</tt> if the path is undefined
jaroslav@1258	1306	*/
jaroslav@1258	1307	public String getPath() {
jaroslav@1258	1308	if ((decodedPath == null) && (path != null))
jaroslav@1258	1309	decodedPath = decode(path);
jaroslav@1258	1310	return decodedPath;
jaroslav@1258	1311	}
jaroslav@1258	1312
jaroslav@1258	1313	/**
jaroslav@1258	1314	* Returns the raw query component of this URI.
jaroslav@1258	1315	*
jaroslav@1258	1316	* <p> The query component of a URI, if defined, only contains legal URI
jaroslav@1258	1317	* characters. </p>
jaroslav@1258	1318	*
jaroslav@1258	1319	* @return The raw query component of this URI,
jaroslav@1258	1320	* or <tt>null</tt> if the query is undefined
jaroslav@1258	1321	*/
jaroslav@1258	1322	public String getRawQuery() {
jaroslav@1258	1323	return query;
jaroslav@1258	1324	}
jaroslav@1258	1325
jaroslav@1258	1326	/**
jaroslav@1258	1327	* Returns the decoded query component of this URI.
jaroslav@1258	1328	*
jaroslav@1258	1329	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1330	* {@link #getRawQuery() getRawQuery} method except that all sequences of
jaroslav@1258	1331	* escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1332	*
jaroslav@1258	1333	* @return The decoded query component of this URI,
jaroslav@1258	1334	* or <tt>null</tt> if the query is undefined
jaroslav@1258	1335	*/
jaroslav@1258	1336	public String getQuery() {
jaroslav@1258	1337	if ((decodedQuery == null) && (query != null))
jaroslav@1258	1338	decodedQuery = decode(query);
jaroslav@1258	1339	return decodedQuery;
jaroslav@1258	1340	}
jaroslav@1258	1341
jaroslav@1258	1342	/**
jaroslav@1258	1343	* Returns the raw fragment component of this URI.
jaroslav@1258	1344	*
jaroslav@1258	1345	* <p> The fragment component of a URI, if defined, only contains legal URI
jaroslav@1258	1346	* characters. </p>
jaroslav@1258	1347	*
jaroslav@1258	1348	* @return The raw fragment component of this URI,
jaroslav@1258	1349	* or <tt>null</tt> if the fragment is undefined
jaroslav@1258	1350	*/
jaroslav@1258	1351	public String getRawFragment() {
jaroslav@1258	1352	return fragment;
jaroslav@1258	1353	}
jaroslav@1258	1354
jaroslav@1258	1355	/**
jaroslav@1258	1356	* Returns the decoded fragment component of this URI.
jaroslav@1258	1357	*
jaroslav@1258	1358	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1359	* {@link #getRawFragment() getRawFragment} method except that all
jaroslav@1258	1360	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1361	*
jaroslav@1258	1362	* @return The decoded fragment component of this URI,
jaroslav@1258	1363	* or <tt>null</tt> if the fragment is undefined
jaroslav@1258	1364	*/
jaroslav@1258	1365	public String getFragment() {
jaroslav@1258	1366	if ((decodedFragment == null) && (fragment != null))
jaroslav@1258	1367	decodedFragment = decode(fragment);
jaroslav@1258	1368	return decodedFragment;
jaroslav@1258	1369	}
jaroslav@1258	1370
jaroslav@1258	1371
jaroslav@1258	1372	// -- Equality, comparison, hash code, toString, and serialization --
jaroslav@1258	1373
jaroslav@1258	1374	/**
jaroslav@1258	1375	* Tests this URI for equality with another object.
jaroslav@1258	1376	*
jaroslav@1258	1377	* <p> If the given object is not a URI then this method immediately
jaroslav@1258	1378	* returns <tt>false</tt>.
jaroslav@1258	1379	*
jaroslav@1258	1380	* <p> For two URIs to be considered equal requires that either both are
jaroslav@1258	1381	* opaque or both are hierarchical. Their schemes must either both be
jaroslav@1258	1382	* undefined or else be equal without regard to case. Their fragments
jaroslav@1258	1383	* must either both be undefined or else be equal.
jaroslav@1258	1384	*
jaroslav@1258	1385	* <p> For two opaque URIs to be considered equal, their scheme-specific
jaroslav@1258	1386	* parts must be equal.
jaroslav@1258	1387	*
jaroslav@1258	1388	* <p> For two hierarchical URIs to be considered equal, their paths must
jaroslav@1258	1389	* be equal and their queries must either both be undefined or else be
jaroslav@1258	1390	* equal. Their authorities must either both be undefined, or both be
jaroslav@1258	1391	* registry-based, or both be server-based. If their authorities are
jaroslav@1258	1392	* defined and are registry-based, then they must be equal. If their
jaroslav@1258	1393	* authorities are defined and are server-based, then their hosts must be
jaroslav@1258	1394	* equal without regard to case, their port numbers must be equal, and
jaroslav@1258	1395	* their user-information components must be equal.
jaroslav@1258	1396	*
jaroslav@1258	1397	* <p> When testing the user-information, path, query, fragment, authority,
jaroslav@1258	1398	* or scheme-specific parts of two URIs for equality, the raw forms rather
jaroslav@1258	1399	* than the encoded forms of these components are compared and the
jaroslav@1258	1400	* hexadecimal digits of escaped octets are compared without regard to
jaroslav@1258	1401	* case.
jaroslav@1258	1402	*
jaroslav@1258	1403	* <p> This method satisfies the general contract of the {@link
jaroslav@1258	1404	* java.lang.Object#equals(Object) Object.equals} method. </p>
jaroslav@1258	1405	*
jaroslav@1258	1406	* @param ob The object to which this object is to be compared
jaroslav@1258	1407	*
jaroslav@1258	1408	* @return <tt>true</tt> if, and only if, the given object is a URI that
jaroslav@1258	1409	* is identical to this URI
jaroslav@1258	1410	*/
jaroslav@1258	1411	public boolean equals(Object ob) {
jaroslav@1258	1412	if (ob == this)
jaroslav@1258	1413	return true;
jaroslav@1258	1414	if (!(ob instanceof URI))
jaroslav@1258	1415	return false;
jaroslav@1258	1416	URI that = (URI)ob;
jaroslav@1258	1417	if (this.isOpaque() != that.isOpaque()) return false;
jaroslav@1258	1418	if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
jaroslav@1258	1419	if (!equal(this.fragment, that.fragment)) return false;
jaroslav@1258	1420
jaroslav@1258	1421	// Opaque
jaroslav@1258	1422	if (this.isOpaque())
jaroslav@1258	1423	return equal(this.schemeSpecificPart, that.schemeSpecificPart);
jaroslav@1258	1424
jaroslav@1258	1425	// Hierarchical
jaroslav@1258	1426	if (!equal(this.path, that.path)) return false;
jaroslav@1258	1427	if (!equal(this.query, that.query)) return false;
jaroslav@1258	1428
jaroslav@1258	1429	// Authorities
jaroslav@1258	1430	if (this.authority == that.authority) return true;
jaroslav@1258	1431	if (this.host != null) {
jaroslav@1258	1432	// Server-based
jaroslav@1258	1433	if (!equal(this.userInfo, that.userInfo)) return false;
jaroslav@1258	1434	if (!equalIgnoringCase(this.host, that.host)) return false;
jaroslav@1258	1435	if (this.port != that.port) return false;
jaroslav@1258	1436	} else if (this.authority != null) {
jaroslav@1258	1437	// Registry-based
jaroslav@1258	1438	if (!equal(this.authority, that.authority)) return false;
jaroslav@1258	1439	} else if (this.authority != that.authority) {
jaroslav@1258	1440	return false;
jaroslav@1258	1441	}
jaroslav@1258	1442
jaroslav@1258	1443	return true;
jaroslav@1258	1444	}
jaroslav@1258	1445
jaroslav@1258	1446	/**
jaroslav@1258	1447	* Returns a hash-code value for this URI. The hash code is based upon all
jaroslav@1258	1448	* of the URI's components, and satisfies the general contract of the
jaroslav@1258	1449	* {@link java.lang.Object#hashCode() Object.hashCode} method.
jaroslav@1258	1450	*
jaroslav@1258	1451	* @return A hash-code value for this URI
jaroslav@1258	1452	*/
jaroslav@1258	1453	public int hashCode() {
jaroslav@1258	1454	if (hash != 0)
jaroslav@1258	1455	return hash;
jaroslav@1258	1456	int h = hashIgnoringCase(0, scheme);
jaroslav@1258	1457	h = hash(h, fragment);
jaroslav@1258	1458	if (isOpaque()) {
jaroslav@1258	1459	h = hash(h, schemeSpecificPart);
jaroslav@1258	1460	} else {
jaroslav@1258	1461	h = hash(h, path);
jaroslav@1258	1462	h = hash(h, query);
jaroslav@1258	1463	if (host != null) {
jaroslav@1258	1464	h = hash(h, userInfo);
jaroslav@1258	1465	h = hashIgnoringCase(h, host);
jaroslav@1258	1466	h += 1949 * port;
jaroslav@1258	1467	} else {
jaroslav@1258	1468	h = hash(h, authority);
jaroslav@1258	1469	}
jaroslav@1258	1470	}
jaroslav@1258	1471	hash = h;
jaroslav@1258	1472	return h;
jaroslav@1258	1473	}
jaroslav@1258	1474
jaroslav@1258	1475	/**
jaroslav@1258	1476	* Compares this URI to another object, which must be a URI.
jaroslav@1258	1477	*
jaroslav@1258	1478	* <p> When comparing corresponding components of two URIs, if one
jaroslav@1258	1479	* component is undefined but the other is defined then the first is
jaroslav@1258	1480	* considered to be less than the second. Unless otherwise noted, string
jaroslav@1258	1481	* components are ordered according to their natural, case-sensitive
jaroslav@1258	1482	* ordering as defined by the {@link java.lang.String#compareTo(Object)
jaroslav@1258	1483	* String.compareTo} method. String components that are subject to
jaroslav@1258	1484	* encoding are compared by comparing their raw forms rather than their
jaroslav@1258	1485	* encoded forms.
jaroslav@1258	1486	*
jaroslav@1258	1487	* <p> The ordering of URIs is defined as follows: </p>
jaroslav@1258	1488	*
jaroslav@1258	1489	* <ul type=disc>
jaroslav@1258	1490	*
jaroslav@1258	1491	* <li><p> Two URIs with different schemes are ordered according the
jaroslav@1258	1492	* ordering of their schemes, without regard to case. </p></li>
jaroslav@1258	1493	*
jaroslav@1258	1494	* <li><p> A hierarchical URI is considered to be less than an opaque URI
jaroslav@1258	1495	* with an identical scheme. </p></li>
jaroslav@1258	1496	*
jaroslav@1258	1497	* <li><p> Two opaque URIs with identical schemes are ordered according
jaroslav@1258	1498	* to the ordering of their scheme-specific parts. </p></li>
jaroslav@1258	1499	*
jaroslav@1258	1500	* <li><p> Two opaque URIs with identical schemes and scheme-specific
jaroslav@1258	1501	* parts are ordered according to the ordering of their
jaroslav@1258	1502	* fragments. </p></li>
jaroslav@1258	1503	*
jaroslav@1258	1504	* <li><p> Two hierarchical URIs with identical schemes are ordered
jaroslav@1258	1505	* according to the ordering of their authority components: </p>
jaroslav@1258	1506	*
jaroslav@1258	1507	* <ul type=disc>
jaroslav@1258	1508	*
jaroslav@1258	1509	* <li><p> If both authority components are server-based then the URIs
jaroslav@1258	1510	* are ordered according to their user-information components; if these
jaroslav@1258	1511	* components are identical then the URIs are ordered according to the
jaroslav@1258	1512	* ordering of their hosts, without regard to case; if the hosts are
jaroslav@1258	1513	* identical then the URIs are ordered according to the ordering of
jaroslav@1258	1514	* their ports. </p></li>
jaroslav@1258	1515	*
jaroslav@1258	1516	* <li><p> If one or both authority components are registry-based then
jaroslav@1258	1517	* the URIs are ordered according to the ordering of their authority
jaroslav@1258	1518	* components. </p></li>
jaroslav@1258	1519	*
jaroslav@1258	1520	* </ul></li>
jaroslav@1258	1521	*
jaroslav@1258	1522	* <li><p> Finally, two hierarchical URIs with identical schemes and
jaroslav@1258	1523	* authority components are ordered according to the ordering of their
jaroslav@1258	1524	* paths; if their paths are identical then they are ordered according to
jaroslav@1258	1525	* the ordering of their queries; if the queries are identical then they
jaroslav@1258	1526	* are ordered according to the order of their fragments. </p></li>
jaroslav@1258	1527	*
jaroslav@1258	1528	* </ul>
jaroslav@1258	1529	*
jaroslav@1258	1530	* <p> This method satisfies the general contract of the {@link
jaroslav@1258	1531	* java.lang.Comparable#compareTo(Object) Comparable.compareTo}
jaroslav@1258	1532	* method. </p>
jaroslav@1258	1533	*
jaroslav@1258	1534	* @param that
jaroslav@1258	1535	* The object to which this URI is to be compared
jaroslav@1258	1536	*
jaroslav@1258	1537	* @return A negative integer, zero, or a positive integer as this URI is
jaroslav@1258	1538	* less than, equal to, or greater than the given URI
jaroslav@1258	1539	*
jaroslav@1258	1540	* @throws ClassCastException
jaroslav@1258	1541	* If the given object is not a URI
jaroslav@1258	1542	*/
jaroslav@1258	1543	public int compareTo(URI that) {
jaroslav@1258	1544	int c;
jaroslav@1258	1545
jaroslav@1258	1546	if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
jaroslav@1258	1547	return c;
jaroslav@1258	1548
jaroslav@1258	1549	if (this.isOpaque()) {
jaroslav@1258	1550	if (that.isOpaque()) {
jaroslav@1258	1551	// Both opaque
jaroslav@1258	1552	if ((c = compare(this.schemeSpecificPart,
jaroslav@1258	1553	that.schemeSpecificPart)) != 0)
jaroslav@1258	1554	return c;
jaroslav@1258	1555	return compare(this.fragment, that.fragment);
jaroslav@1258	1556	}
jaroslav@1258	1557	return +1; // Opaque > hierarchical
jaroslav@1258	1558	} else if (that.isOpaque()) {
jaroslav@1258	1559	return -1; // Hierarchical < opaque
jaroslav@1258	1560	}
jaroslav@1258	1561
jaroslav@1258	1562	// Hierarchical
jaroslav@1258	1563	if ((this.host != null) && (that.host != null)) {
jaroslav@1258	1564	// Both server-based
jaroslav@1258	1565	if ((c = compare(this.userInfo, that.userInfo)) != 0)
jaroslav@1258	1566	return c;
jaroslav@1258	1567	if ((c = compareIgnoringCase(this.host, that.host)) != 0)
jaroslav@1258	1568	return c;
jaroslav@1258	1569	if ((c = this.port - that.port) != 0)
jaroslav@1258	1570	return c;
jaroslav@1258	1571	} else {
jaroslav@1258	1572	// If one or both authorities are registry-based then we simply
jaroslav@1258	1573	// compare them in the usual, case-sensitive way. If one is
jaroslav@1258	1574	// registry-based and one is server-based then the strings are
jaroslav@1258	1575	// guaranteed to be unequal, hence the comparison will never return
jaroslav@1258	1576	// zero and the compareTo and equals methods will remain
jaroslav@1258	1577	// consistent.
jaroslav@1258	1578	if ((c = compare(this.authority, that.authority)) != 0) return c;
jaroslav@1258	1579	}
jaroslav@1258	1580
jaroslav@1258	1581	if ((c = compare(this.path, that.path)) != 0) return c;
jaroslav@1258	1582	if ((c = compare(this.query, that.query)) != 0) return c;
jaroslav@1258	1583	return compare(this.fragment, that.fragment);
jaroslav@1258	1584	}
jaroslav@1258	1585
jaroslav@1258	1586	/**
jaroslav@1258	1587	* Returns the content of this URI as a string.
jaroslav@1258	1588	*
jaroslav@1258	1589	* <p> If this URI was created by invoking one of the constructors in this
jaroslav@1258	1590	* class then a string equivalent to the original input string, or to the
jaroslav@1258	1591	* string computed from the originally-given components, as appropriate, is
jaroslav@1258	1592	* returned. Otherwise this URI was created by normalization, resolution,
jaroslav@1258	1593	* or relativization, and so a string is constructed from this URI's
jaroslav@1258	1594	* components according to the rules specified in <a
jaroslav@1258	1595	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	1596	* section 5.2, step 7. </p>
jaroslav@1258	1597	*
jaroslav@1258	1598	* @return The string form of this URI
jaroslav@1258	1599	*/
jaroslav@1258	1600	public String toString() {
jaroslav@1258	1601	defineString();
jaroslav@1258	1602	return string;
jaroslav@1258	1603	}
jaroslav@1258	1604
jaroslav@1258	1605	/**
jaroslav@1258	1606	* Returns the content of this URI as a US-ASCII string.
jaroslav@1258	1607	*
jaroslav@1258	1608	* <p> If this URI does not contain any characters in the <i>other</i>
jaroslav@1258	1609	* category then an invocation of this method will return the same value as
jaroslav@1258	1610	* an invocation of the {@link #toString() toString} method. Otherwise
jaroslav@1258	1611	* this method works as if by invoking that method and then <a
jaroslav@1258	1612	* href="#encode">encoding</a> the result. </p>
jaroslav@1258	1613	*
jaroslav@1258	1614	* @return The string form of this URI, encoded as needed
jaroslav@1258	1615	* so that it only contains characters in the US-ASCII
jaroslav@1258	1616	* charset
jaroslav@1258	1617	*/
jaroslav@1258	1618	public String toASCIIString() {
jaroslav@1258	1619	defineString();
jaroslav@1258	1620	return encode(string);
jaroslav@1258	1621	}
jaroslav@1258	1622
jaroslav@1258	1623
jaroslav@1258	1624	// -- Serialization support --
jaroslav@1258	1625
jaroslav@1258	1626	/**
jaroslav@1258	1627	* Saves the content of this URI to the given serial stream.
jaroslav@1258	1628	*
jaroslav@1258	1629	* <p> The only serializable field of a URI instance is its <tt>string</tt>
jaroslav@1258	1630	* field. That field is given a value, if it does not have one already,
jaroslav@1258	1631	* and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
jaroslav@1258	1632	* method of the given object-output stream is invoked. </p>
jaroslav@1258	1633	*
jaroslav@1258	1634	* @param os The object-output stream to which this object
jaroslav@1258	1635	* is to be written
jaroslav@1258	1636	*/
jaroslav@1258	1637	private void writeObject(ObjectOutputStream os)
jaroslav@1258	1638	throws IOException
jaroslav@1258	1639	{
jaroslav@1258	1640	defineString();
jaroslav@1258	1641	os.defaultWriteObject(); // Writes the string field only
jaroslav@1258	1642	}
jaroslav@1258	1643
jaroslav@1258	1644	/**
jaroslav@1258	1645	* Reconstitutes a URI from the given serial stream.
jaroslav@1258	1646	*
jaroslav@1258	1647	* <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
jaroslav@1258	1648	* invoked to read the value of the <tt>string</tt> field. The result is
jaroslav@1258	1649	* then parsed in the usual way.
jaroslav@1258	1650	*
jaroslav@1258	1651	* @param is The object-input stream from which this object
jaroslav@1258	1652	* is being read
jaroslav@1258	1653	*/
jaroslav@1258	1654	private void readObject(ObjectInputStream is)
jaroslav@1258	1655	throws ClassNotFoundException, IOException
jaroslav@1258	1656	{
jaroslav@1258	1657	port = -1; // Argh
jaroslav@1258	1658	is.defaultReadObject();
jaroslav@1258	1659	try {
jaroslav@1258	1660	new Parser(string).parse(false);
jaroslav@1258	1661	} catch (URISyntaxException x) {
jaroslav@1258	1662	IOException y = new InvalidObjectException("Invalid URI");
jaroslav@1258	1663	y.initCause(x);
jaroslav@1258	1664	throw y;
jaroslav@1258	1665	}
jaroslav@1258	1666	}
jaroslav@1258	1667
jaroslav@1258	1668
jaroslav@1258	1669	// -- End of public methods --
jaroslav@1258	1670
jaroslav@1258	1671
jaroslav@1258	1672	// -- Utility methods for string-field comparison and hashing --
jaroslav@1258	1673
jaroslav@1258	1674	// These methods return appropriate values for null string arguments,
jaroslav@1258	1675	// thereby simplifying the equals, hashCode, and compareTo methods.
jaroslav@1258	1676	//
jaroslav@1258	1677	// The case-ignoring methods should only be applied to strings whose
jaroslav@1258	1678	// characters are all known to be US-ASCII. Because of this restriction,
jaroslav@1258	1679	// these methods are faster than the similar methods in the String class.
jaroslav@1258	1680
jaroslav@1258	1681	// US-ASCII only
jaroslav@1258	1682	private static int toLower(char c) {
jaroslav@1258	1683	if ((c >= 'A') && (c <= 'Z'))
jaroslav@1258	1684	return c + ('a' - 'A');
jaroslav@1258	1685	return c;
jaroslav@1258	1686	}
jaroslav@1258	1687
jaroslav@1258	1688	private static boolean equal(String s, String t) {
jaroslav@1258	1689	if (s == t) return true;
jaroslav@1258	1690	if ((s != null) && (t != null)) {
jaroslav@1258	1691	if (s.length() != t.length())
jaroslav@1258	1692	return false;
jaroslav@1258	1693	if (s.indexOf('%') < 0)
jaroslav@1258	1694	return s.equals(t);
jaroslav@1258	1695	int n = s.length();
jaroslav@1258	1696	for (int i = 0; i < n;) {
jaroslav@1258	1697	char c = s.charAt(i);
jaroslav@1258	1698	char d = t.charAt(i);
jaroslav@1258	1699	if (c != '%') {
jaroslav@1258	1700	if (c != d)
jaroslav@1258	1701	return false;
jaroslav@1258	1702	i++;
jaroslav@1258	1703	continue;
jaroslav@1258	1704	}
jaroslav@1258	1705	i++;
jaroslav@1258	1706	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
jaroslav@1258	1707	return false;
jaroslav@1258	1708	i++;
jaroslav@1258	1709	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
jaroslav@1258	1710	return false;
jaroslav@1258	1711	i++;
jaroslav@1258	1712	}
jaroslav@1258	1713	return true;
jaroslav@1258	1714	}
jaroslav@1258	1715	return false;
jaroslav@1258	1716	}
jaroslav@1258	1717
jaroslav@1258	1718	// US-ASCII only
jaroslav@1258	1719	private static boolean equalIgnoringCase(String s, String t) {
jaroslav@1258	1720	if (s == t) return true;
jaroslav@1258	1721	if ((s != null) && (t != null)) {
jaroslav@1258	1722	int n = s.length();
jaroslav@1258	1723	if (t.length() != n)
jaroslav@1258	1724	return false;
jaroslav@1258	1725	for (int i = 0; i < n; i++) {
jaroslav@1258	1726	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
jaroslav@1258	1727	return false;
jaroslav@1258	1728	}
jaroslav@1258	1729	return true;
jaroslav@1258	1730	}
jaroslav@1258	1731	return false;
jaroslav@1258	1732	}
jaroslav@1258	1733
jaroslav@1258	1734	private static int hash(int hash, String s) {
jaroslav@1258	1735	if (s == null) return hash;
jaroslav@1258	1736	return hash * 127 + s.hashCode();
jaroslav@1258	1737	}
jaroslav@1258	1738
jaroslav@1258	1739	// US-ASCII only
jaroslav@1258	1740	private static int hashIgnoringCase(int hash, String s) {
jaroslav@1258	1741	if (s == null) return hash;
jaroslav@1258	1742	int h = hash;
jaroslav@1258	1743	int n = s.length();
jaroslav@1258	1744	for (int i = 0; i < n; i++)
jaroslav@1258	1745	h = 31 * h + toLower(s.charAt(i));
jaroslav@1258	1746	return h;
jaroslav@1258	1747	}
jaroslav@1258	1748
jaroslav@1258	1749	private static int compare(String s, String t) {
jaroslav@1258	1750	if (s == t) return 0;
jaroslav@1258	1751	if (s != null) {
jaroslav@1258	1752	if (t != null)
jaroslav@1258	1753	return s.compareTo(t);
jaroslav@1258	1754	else
jaroslav@1258	1755	return +1;
jaroslav@1258	1756	} else {
jaroslav@1258	1757	return -1;
jaroslav@1258	1758	}
jaroslav@1258	1759	}
jaroslav@1258	1760
jaroslav@1258	1761	// US-ASCII only
jaroslav@1258	1762	private static int compareIgnoringCase(String s, String t) {
jaroslav@1258	1763	if (s == t) return 0;
jaroslav@1258	1764	if (s != null) {
jaroslav@1258	1765	if (t != null) {
jaroslav@1258	1766	int sn = s.length();
jaroslav@1258	1767	int tn = t.length();
jaroslav@1258	1768	int n = sn < tn ? sn : tn;
jaroslav@1258	1769	for (int i = 0; i < n; i++) {
jaroslav@1258	1770	int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
jaroslav@1258	1771	if (c != 0)
jaroslav@1258	1772	return c;
jaroslav@1258	1773	}
jaroslav@1258	1774	return sn - tn;
jaroslav@1258	1775	}
jaroslav@1258	1776	return +1;
jaroslav@1258	1777	} else {
jaroslav@1258	1778	return -1;
jaroslav@1258	1779	}
jaroslav@1258	1780	}
jaroslav@1258	1781
jaroslav@1258	1782
jaroslav@1258	1783	// -- String construction --
jaroslav@1258	1784
jaroslav@1258	1785	// If a scheme is given then the path, if given, must be absolute
jaroslav@1258	1786	//
jaroslav@1258	1787	private static void checkPath(String s, String scheme, String path)
jaroslav@1258	1788	throws URISyntaxException
jaroslav@1258	1789	{
jaroslav@1258	1790	if (scheme != null) {
jaroslav@1258	1791	if ((path != null)
jaroslav@1258	1792	&& ((path.length() > 0) && (path.charAt(0) != '/')))
jaroslav@1258	1793	throw new URISyntaxException(s,
jaroslav@1258	1794	"Relative path in absolute URI");
jaroslav@1258	1795	}
jaroslav@1258	1796	}
jaroslav@1258	1797
jaroslav@1258	1798	private void appendAuthority(StringBuffer sb,
jaroslav@1258	1799	String authority,
jaroslav@1258	1800	String userInfo,
jaroslav@1258	1801	String host,
jaroslav@1258	1802	int port)
jaroslav@1258	1803	{
jaroslav@1258	1804	if (host != null) {
jaroslav@1258	1805	sb.append("//");
jaroslav@1258	1806	if (userInfo != null) {
jaroslav@1258	1807	sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
jaroslav@1258	1808	sb.append('@');
jaroslav@1258	1809	}
jaroslav@1258	1810	boolean needBrackets = ((host.indexOf(':') >= 0)
jaroslav@1258	1811	&& !host.startsWith("[")
jaroslav@1258	1812	&& !host.endsWith("]"));
jaroslav@1258	1813	if (needBrackets) sb.append('[');
jaroslav@1258	1814	sb.append(host);
jaroslav@1258	1815	if (needBrackets) sb.append(']');
jaroslav@1258	1816	if (port != -1) {
jaroslav@1258	1817	sb.append(':');
jaroslav@1258	1818	sb.append(port);
jaroslav@1258	1819	}
jaroslav@1258	1820	} else if (authority != null) {
jaroslav@1258	1821	sb.append("//");
jaroslav@1258	1822	if (authority.startsWith("[")) {
jaroslav@1258	1823	// authority should (but may not) contain an embedded IPv6 address
jaroslav@1258	1824	int end = authority.indexOf("]");
jaroslav@1258	1825	String doquote = authority, dontquote = "";
jaroslav@1258	1826	if (end != -1 && authority.indexOf(":") != -1) {
jaroslav@1258	1827	// the authority contains an IPv6 address
jaroslav@1258	1828	if (end == authority.length()) {
jaroslav@1258	1829	dontquote = authority;
jaroslav@1258	1830	doquote = "";
jaroslav@1258	1831	} else {
jaroslav@1258	1832	dontquote = authority.substring(0 , end + 1);
jaroslav@1258	1833	doquote = authority.substring(end + 1);
jaroslav@1258	1834	}
jaroslav@1258	1835	}
jaroslav@1258	1836	sb.append(dontquote);
jaroslav@1258	1837	sb.append(quote(doquote,
jaroslav@1258	1838	L_REG_NAME \| L_SERVER,
jaroslav@1258	1839	H_REG_NAME \| H_SERVER));
jaroslav@1258	1840	} else {
jaroslav@1258	1841	sb.append(quote(authority,
jaroslav@1258	1842	L_REG_NAME \| L_SERVER,
jaroslav@1258	1843	H_REG_NAME \| H_SERVER));
jaroslav@1258	1844	}
jaroslav@1258	1845	}
jaroslav@1258	1846	}
jaroslav@1258	1847
jaroslav@1258	1848	private void appendSchemeSpecificPart(StringBuffer sb,
jaroslav@1258	1849	String opaquePart,
jaroslav@1258	1850	String authority,
jaroslav@1258	1851	String userInfo,
jaroslav@1258	1852	String host,
jaroslav@1258	1853	int port,
jaroslav@1258	1854	String path,
jaroslav@1258	1855	String query)
jaroslav@1258	1856	{
jaroslav@1258	1857	if (opaquePart != null) {
jaroslav@1258	1858	/* check if SSP begins with an IPv6 address
jaroslav@1258	1859	* because we must not quote a literal IPv6 address
jaroslav@1258	1860	*/
jaroslav@1258	1861	if (opaquePart.startsWith("//[")) {
jaroslav@1258	1862	int end = opaquePart.indexOf("]");
jaroslav@1258	1863	if (end != -1 && opaquePart.indexOf(":")!=-1) {
jaroslav@1258	1864	String doquote, dontquote;
jaroslav@1258	1865	if (end == opaquePart.length()) {
jaroslav@1258	1866	dontquote = opaquePart;
jaroslav@1258	1867	doquote = "";
jaroslav@1258	1868	} else {
jaroslav@1258	1869	dontquote = opaquePart.substring(0,end+1);
jaroslav@1258	1870	doquote = opaquePart.substring(end+1);
jaroslav@1258	1871	}
jaroslav@1258	1872	sb.append (dontquote);
jaroslav@1258	1873	sb.append(quote(doquote, L_URIC, H_URIC));
jaroslav@1258	1874	}
jaroslav@1258	1875	} else {
jaroslav@1258	1876	sb.append(quote(opaquePart, L_URIC, H_URIC));
jaroslav@1258	1877	}
jaroslav@1258	1878	} else {
jaroslav@1258	1879	appendAuthority(sb, authority, userInfo, host, port);
jaroslav@1258	1880	if (path != null)
jaroslav@1258	1881	sb.append(quote(path, L_PATH, H_PATH));
jaroslav@1258	1882	if (query != null) {
jaroslav@1258	1883	sb.append('?');
jaroslav@1258	1884	sb.append(quote(query, L_URIC, H_URIC));
jaroslav@1258	1885	}
jaroslav@1258	1886	}
jaroslav@1258	1887	}
jaroslav@1258	1888
jaroslav@1258	1889	private void appendFragment(StringBuffer sb, String fragment) {
jaroslav@1258	1890	if (fragment != null) {
jaroslav@1258	1891	sb.append('#');
jaroslav@1258	1892	sb.append(quote(fragment, L_URIC, H_URIC));
jaroslav@1258	1893	}
jaroslav@1258	1894	}
jaroslav@1258	1895
jaroslav@1258	1896	private String toString(String scheme,
jaroslav@1258	1897	String opaquePart,
jaroslav@1258	1898	String authority,
jaroslav@1258	1899	String userInfo,
jaroslav@1258	1900	String host,
jaroslav@1258	1901	int port,
jaroslav@1258	1902	String path,
jaroslav@1258	1903	String query,
jaroslav@1258	1904	String fragment)
jaroslav@1258	1905	{
jaroslav@1258	1906	StringBuffer sb = new StringBuffer();
jaroslav@1258	1907	if (scheme != null) {
jaroslav@1258	1908	sb.append(scheme);
jaroslav@1258	1909	sb.append(':');
jaroslav@1258	1910	}
jaroslav@1258	1911	appendSchemeSpecificPart(sb, opaquePart,
jaroslav@1258	1912	authority, userInfo, host, port,
jaroslav@1258	1913	path, query);
jaroslav@1258	1914	appendFragment(sb, fragment);
jaroslav@1258	1915	return sb.toString();
jaroslav@1258	1916	}
jaroslav@1258	1917
jaroslav@1258	1918	private void defineSchemeSpecificPart() {
jaroslav@1258	1919	if (schemeSpecificPart != null) return;
jaroslav@1258	1920	StringBuffer sb = new StringBuffer();
jaroslav@1258	1921	appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
jaroslav@1258	1922	host, port, getPath(), getQuery());
jaroslav@1258	1923	if (sb.length() == 0) return;
jaroslav@1258	1924	schemeSpecificPart = sb.toString();
jaroslav@1258	1925	}
jaroslav@1258	1926
jaroslav@1258	1927	private void defineString() {
jaroslav@1258	1928	if (string != null) return;
jaroslav@1258	1929
jaroslav@1258	1930	StringBuffer sb = new StringBuffer();
jaroslav@1258	1931	if (scheme != null) {
jaroslav@1258	1932	sb.append(scheme);
jaroslav@1258	1933	sb.append(':');
jaroslav@1258	1934	}
jaroslav@1258	1935	if (isOpaque()) {
jaroslav@1258	1936	sb.append(schemeSpecificPart);
jaroslav@1258	1937	} else {
jaroslav@1258	1938	if (host != null) {
jaroslav@1258	1939	sb.append("//");
jaroslav@1258	1940	if (userInfo != null) {
jaroslav@1258	1941	sb.append(userInfo);
jaroslav@1258	1942	sb.append('@');
jaroslav@1258	1943	}
jaroslav@1258	1944	boolean needBrackets = ((host.indexOf(':') >= 0)
jaroslav@1258	1945	&& !host.startsWith("[")
jaroslav@1258	1946	&& !host.endsWith("]"));
jaroslav@1258	1947	if (needBrackets) sb.append('[');
jaroslav@1258	1948	sb.append(host);
jaroslav@1258	1949	if (needBrackets) sb.append(']');
jaroslav@1258	1950	if (port != -1) {
jaroslav@1258	1951	sb.append(':');
jaroslav@1258	1952	sb.append(port);
jaroslav@1258	1953	}
jaroslav@1258	1954	} else if (authority != null) {
jaroslav@1258	1955	sb.append("//");
jaroslav@1258	1956	sb.append(authority);
jaroslav@1258	1957	}
jaroslav@1258	1958	if (path != null)
jaroslav@1258	1959	sb.append(path);
jaroslav@1258	1960	if (query != null) {
jaroslav@1258	1961	sb.append('?');
jaroslav@1258	1962	sb.append(query);
jaroslav@1258	1963	}
jaroslav@1258	1964	}
jaroslav@1258	1965	if (fragment != null) {
jaroslav@1258	1966	sb.append('#');
jaroslav@1258	1967	sb.append(fragment);
jaroslav@1258	1968	}
jaroslav@1258	1969	string = sb.toString();
jaroslav@1258	1970	}
jaroslav@1258	1971
jaroslav@1258	1972
jaroslav@1258	1973	// -- Normalization, resolution, and relativization --
jaroslav@1258	1974
jaroslav@1258	1975	// RFC2396 5.2 (6)
jaroslav@1258	1976	private static String resolvePath(String base, String child,
jaroslav@1258	1977	boolean absolute)
jaroslav@1258	1978	{
jaroslav@1258	1979	int i = base.lastIndexOf('/');
jaroslav@1258	1980	int cn = child.length();
jaroslav@1258	1981	String path = "";
jaroslav@1258	1982
jaroslav@1258	1983	if (cn == 0) {
jaroslav@1258	1984	// 5.2 (6a)
jaroslav@1258	1985	if (i >= 0)
jaroslav@1258	1986	path = base.substring(0, i + 1);
jaroslav@1258	1987	} else {
jaroslav@1258	1988	StringBuffer sb = new StringBuffer(base.length() + cn);
jaroslav@1258	1989	// 5.2 (6a)
jaroslav@1258	1990	if (i >= 0)
jaroslav@1258	1991	sb.append(base.substring(0, i + 1));
jaroslav@1258	1992	// 5.2 (6b)
jaroslav@1258	1993	sb.append(child);
jaroslav@1258	1994	path = sb.toString();
jaroslav@1258	1995	}
jaroslav@1258	1996
jaroslav@1258	1997	// 5.2 (6c-f)
jaroslav@1258	1998	String np = normalize(path);
jaroslav@1258	1999
jaroslav@1258	2000	// 5.2 (6g): If the result is absolute but the path begins with "../",
jaroslav@1258	2001	// then we simply leave the path as-is
jaroslav@1258	2002
jaroslav@1258	2003	return np;
jaroslav@1258	2004	}
jaroslav@1258	2005
jaroslav@1258	2006	// RFC2396 5.2
jaroslav@1258	2007	private static URI resolve(URI base, URI child) {
jaroslav@1258	2008	// check if child if opaque first so that NPE is thrown
jaroslav@1258	2009	// if child is null.
jaroslav@1258	2010	if (child.isOpaque() \|\| base.isOpaque())
jaroslav@1258	2011	return child;
jaroslav@1258	2012
jaroslav@1258	2013	// 5.2 (2): Reference to current document (lone fragment)
jaroslav@1258	2014	if ((child.scheme == null) && (child.authority == null)
jaroslav@1258	2015	&& child.path.equals("") && (child.fragment != null)
jaroslav@1258	2016	&& (child.query == null)) {
jaroslav@1258	2017	if ((base.fragment != null)
jaroslav@1258	2018	&& child.fragment.equals(base.fragment)) {
jaroslav@1258	2019	return base;
jaroslav@1258	2020	}
jaroslav@1258	2021	URI ru = new URI();
jaroslav@1258	2022	ru.scheme = base.scheme;
jaroslav@1258	2023	ru.authority = base.authority;
jaroslav@1258	2024	ru.userInfo = base.userInfo;
jaroslav@1258	2025	ru.host = base.host;
jaroslav@1258	2026	ru.port = base.port;
jaroslav@1258	2027	ru.path = base.path;
jaroslav@1258	2028	ru.fragment = child.fragment;
jaroslav@1258	2029	ru.query = base.query;
jaroslav@1258	2030	return ru;
jaroslav@1258	2031	}
jaroslav@1258	2032
jaroslav@1258	2033	// 5.2 (3): Child is absolute
jaroslav@1258	2034	if (child.scheme != null)
jaroslav@1258	2035	return child;
jaroslav@1258	2036
jaroslav@1258	2037	URI ru = new URI(); // Resolved URI
jaroslav@1258	2038	ru.scheme = base.scheme;
jaroslav@1258	2039	ru.query = child.query;
jaroslav@1258	2040	ru.fragment = child.fragment;
jaroslav@1258	2041
jaroslav@1258	2042	// 5.2 (4): Authority
jaroslav@1258	2043	if (child.authority == null) {
jaroslav@1258	2044	ru.authority = base.authority;
jaroslav@1258	2045	ru.host = base.host;
jaroslav@1258	2046	ru.userInfo = base.userInfo;
jaroslav@1258	2047	ru.port = base.port;
jaroslav@1258	2048
jaroslav@1258	2049	String cp = (child.path == null) ? "" : child.path;
jaroslav@1258	2050	if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
jaroslav@1258	2051	// 5.2 (5): Child path is absolute
jaroslav@1258	2052	ru.path = child.path;
jaroslav@1258	2053	} else {
jaroslav@1258	2054	// 5.2 (6): Resolve relative path
jaroslav@1258	2055	ru.path = resolvePath(base.path, cp, base.isAbsolute());
jaroslav@1258	2056	}
jaroslav@1258	2057	} else {
jaroslav@1258	2058	ru.authority = child.authority;
jaroslav@1258	2059	ru.host = child.host;
jaroslav@1258	2060	ru.userInfo = child.userInfo;
jaroslav@1258	2061	ru.host = child.host;
jaroslav@1258	2062	ru.port = child.port;
jaroslav@1258	2063	ru.path = child.path;
jaroslav@1258	2064	}
jaroslav@1258	2065
jaroslav@1258	2066	// 5.2 (7): Recombine (nothing to do here)
jaroslav@1258	2067	return ru;
jaroslav@1258	2068	}
jaroslav@1258	2069
jaroslav@1258	2070	// If the given URI's path is normal then return the URI;
jaroslav@1258	2071	// o.w., return a new URI containing the normalized path.
jaroslav@1258	2072	//
jaroslav@1258	2073	private static URI normalize(URI u) {
jaroslav@1258	2074	if (u.isOpaque() \|\| (u.path == null) \|\| (u.path.length() == 0))
jaroslav@1258	2075	return u;
jaroslav@1258	2076
jaroslav@1258	2077	String np = normalize(u.path);
jaroslav@1258	2078	if (np == u.path)
jaroslav@1258	2079	return u;
jaroslav@1258	2080
jaroslav@1258	2081	URI v = new URI();
jaroslav@1258	2082	v.scheme = u.scheme;
jaroslav@1258	2083	v.fragment = u.fragment;
jaroslav@1258	2084	v.authority = u.authority;
jaroslav@1258	2085	v.userInfo = u.userInfo;
jaroslav@1258	2086	v.host = u.host;
jaroslav@1258	2087	v.port = u.port;
jaroslav@1258	2088	v.path = np;
jaroslav@1258	2089	v.query = u.query;
jaroslav@1258	2090	return v;
jaroslav@1258	2091	}
jaroslav@1258	2092
jaroslav@1258	2093	// If both URIs are hierarchical, their scheme and authority components are
jaroslav@1258	2094	// identical, and the base path is a prefix of the child's path, then
jaroslav@1258	2095	// return a relative URI that, when resolved against the base, yields the
jaroslav@1258	2096	// child; otherwise, return the child.
jaroslav@1258	2097	//
jaroslav@1258	2098	private static URI relativize(URI base, URI child) {
jaroslav@1258	2099	// check if child if opaque first so that NPE is thrown
jaroslav@1258	2100	// if child is null.
jaroslav@1258	2101	if (child.isOpaque() \|\| base.isOpaque())
jaroslav@1258	2102	return child;
jaroslav@1258	2103	if (!equalIgnoringCase(base.scheme, child.scheme)
jaroslav@1258	2104	\|\| !equal(base.authority, child.authority))
jaroslav@1258	2105	return child;
jaroslav@1258	2106
jaroslav@1258	2107	String bp = normalize(base.path);
jaroslav@1258	2108	String cp = normalize(child.path);
jaroslav@1258	2109	if (!bp.equals(cp)) {
jaroslav@1258	2110	if (!bp.endsWith("/"))
jaroslav@1258	2111	bp = bp + "/";
jaroslav@1258	2112	if (!cp.startsWith(bp))
jaroslav@1258	2113	return child;
jaroslav@1258	2114	}
jaroslav@1258	2115
jaroslav@1258	2116	URI v = new URI();
jaroslav@1258	2117	v.path = cp.substring(bp.length());
jaroslav@1258	2118	v.query = child.query;
jaroslav@1258	2119	v.fragment = child.fragment;
jaroslav@1258	2120	return v;
jaroslav@1258	2121	}
jaroslav@1258	2122
jaroslav@1258	2123
jaroslav@1258	2124
jaroslav@1258	2125	// -- Path normalization --
jaroslav@1258	2126
jaroslav@1258	2127	// The following algorithm for path normalization avoids the creation of a
jaroslav@1258	2128	// string object for each segment, as well as the use of a string buffer to
jaroslav@1258	2129	// compute the final result, by using a single char array and editing it in
jaroslav@1258	2130	// place. The array is first split into segments, replacing each slash
jaroslav@1258	2131	// with '\0' and creating a segment-index array, each element of which is
jaroslav@1258	2132	// the index of the first char in the corresponding segment. We then walk
jaroslav@1258	2133	// through both arrays, removing ".", "..", and other segments as necessary
jaroslav@1258	2134	// by setting their entries in the index array to -1. Finally, the two
jaroslav@1258	2135	// arrays are used to rejoin the segments and compute the final result.
jaroslav@1258	2136	//
jaroslav@1258	2137	// This code is based upon src/solaris/native/java/io/canonicalize_md.c
jaroslav@1258	2138
jaroslav@1258	2139
jaroslav@1258	2140	// Check the given path to see if it might need normalization. A path
jaroslav@1258	2141	// might need normalization if it contains duplicate slashes, a "."
jaroslav@1258	2142	// segment, or a ".." segment. Return -1 if no further normalization is
jaroslav@1258	2143	// possible, otherwise return the number of segments found.
jaroslav@1258	2144	//
jaroslav@1258	2145	// This method takes a string argument rather than a char array so that
jaroslav@1258	2146	// this test can be performed without invoking path.toCharArray().
jaroslav@1258	2147	//
jaroslav@1258	2148	static private int needsNormalization(String path) {
jaroslav@1258	2149	boolean normal = true;
jaroslav@1258	2150	int ns = 0; // Number of segments
jaroslav@1258	2151	int end = path.length() - 1; // Index of last char in path
jaroslav@1258	2152	int p = 0; // Index of next char in path
jaroslav@1258	2153
jaroslav@1258	2154	// Skip initial slashes
jaroslav@1258	2155	while (p <= end) {
jaroslav@1258	2156	if (path.charAt(p) != '/') break;
jaroslav@1258	2157	p++;
jaroslav@1258	2158	}
jaroslav@1258	2159	if (p > 1) normal = false;
jaroslav@1258	2160
jaroslav@1258	2161	// Scan segments
jaroslav@1258	2162	while (p <= end) {
jaroslav@1258	2163
jaroslav@1258	2164	// Looking at "." or ".." ?
jaroslav@1258	2165	if ((path.charAt(p) == '.')
jaroslav@1258	2166	&& ((p == end)
jaroslav@1258	2167	\|\| ((path.charAt(p + 1) == '/')
jaroslav@1258	2168	\|\| ((path.charAt(p + 1) == '.')
jaroslav@1258	2169	&& ((p + 1 == end)
jaroslav@1258	2170	\|\| (path.charAt(p + 2) == '/')))))) {
jaroslav@1258	2171	normal = false;
jaroslav@1258	2172	}
jaroslav@1258	2173	ns++;
jaroslav@1258	2174
jaroslav@1258	2175	// Find beginning of next segment
jaroslav@1258	2176	while (p <= end) {
jaroslav@1258	2177	if (path.charAt(p++) != '/')
jaroslav@1258	2178	continue;
jaroslav@1258	2179
jaroslav@1258	2180	// Skip redundant slashes
jaroslav@1258	2181	while (p <= end) {
jaroslav@1258	2182	if (path.charAt(p) != '/') break;
jaroslav@1258	2183	normal = false;
jaroslav@1258	2184	p++;
jaroslav@1258	2185	}
jaroslav@1258	2186
jaroslav@1258	2187	break;
jaroslav@1258	2188	}
jaroslav@1258	2189	}
jaroslav@1258	2190
jaroslav@1258	2191	return normal ? -1 : ns;
jaroslav@1258	2192	}
jaroslav@1258	2193
jaroslav@1258	2194
jaroslav@1258	2195	// Split the given path into segments, replacing slashes with nulls and
jaroslav@1258	2196	// filling in the given segment-index array.
jaroslav@1258	2197	//
jaroslav@1258	2198	// Preconditions:
jaroslav@1258	2199	// segs.length == Number of segments in path
jaroslav@1258	2200	//
jaroslav@1258	2201	// Postconditions:
jaroslav@1258	2202	// All slashes in path replaced by '\0'
jaroslav@1258	2203	// segs[i] == Index of first char in segment i (0 <= i < segs.length)
jaroslav@1258	2204	//
jaroslav@1258	2205	static private void split(char[] path, int[] segs) {
jaroslav@1258	2206	int end = path.length - 1; // Index of last char in path
jaroslav@1258	2207	int p = 0; // Index of next char in path
jaroslav@1258	2208	int i = 0; // Index of current segment
jaroslav@1258	2209
jaroslav@1258	2210	// Skip initial slashes
jaroslav@1258	2211	while (p <= end) {
jaroslav@1258	2212	if (path[p] != '/') break;
jaroslav@1258	2213	path[p] = '\0';
jaroslav@1258	2214	p++;
jaroslav@1258	2215	}
jaroslav@1258	2216
jaroslav@1258	2217	while (p <= end) {
jaroslav@1258	2218
jaroslav@1258	2219	// Note start of segment
jaroslav@1258	2220	segs[i++] = p++;
jaroslav@1258	2221
jaroslav@1258	2222	// Find beginning of next segment
jaroslav@1258	2223	while (p <= end) {
jaroslav@1258	2224	if (path[p++] != '/')
jaroslav@1258	2225	continue;
jaroslav@1258	2226	path[p - 1] = '\0';
jaroslav@1258	2227
jaroslav@1258	2228	// Skip redundant slashes
jaroslav@1258	2229	while (p <= end) {
jaroslav@1258	2230	if (path[p] != '/') break;
jaroslav@1258	2231	path[p++] = '\0';
jaroslav@1258	2232	}
jaroslav@1258	2233	break;
jaroslav@1258	2234	}
jaroslav@1258	2235	}
jaroslav@1258	2236
jaroslav@1258	2237	if (i != segs.length)
jaroslav@1258	2238	throw new InternalError(); // ASSERT
jaroslav@1258	2239	}
jaroslav@1258	2240
jaroslav@1258	2241
jaroslav@1258	2242	// Join the segments in the given path according to the given segment-index
jaroslav@1258	2243	// array, ignoring those segments whose index entries have been set to -1,
jaroslav@1258	2244	// and inserting slashes as needed. Return the length of the resulting
jaroslav@1258	2245	// path.
jaroslav@1258	2246	//
jaroslav@1258	2247	// Preconditions:
jaroslav@1258	2248	// segs[i] == -1 implies segment i is to be ignored
jaroslav@1258	2249	// path computed by split, as above, with '\0' having replaced '/'
jaroslav@1258	2250	//
jaroslav@1258	2251	// Postconditions:
jaroslav@1258	2252	// path[0] .. path[return value] == Resulting path
jaroslav@1258	2253	//
jaroslav@1258	2254	static private int join(char[] path, int[] segs) {
jaroslav@1258	2255	int ns = segs.length; // Number of segments
jaroslav@1258	2256	int end = path.length - 1; // Index of last char in path
jaroslav@1258	2257	int p = 0; // Index of next path char to write
jaroslav@1258	2258
jaroslav@1258	2259	if (path[p] == '\0') {
jaroslav@1258	2260	// Restore initial slash for absolute paths
jaroslav@1258	2261	path[p++] = '/';
jaroslav@1258	2262	}
jaroslav@1258	2263
jaroslav@1258	2264	for (int i = 0; i < ns; i++) {
jaroslav@1258	2265	int q = segs[i]; // Current segment
jaroslav@1258	2266	if (q == -1)
jaroslav@1258	2267	// Ignore this segment
jaroslav@1258	2268	continue;
jaroslav@1258	2269
jaroslav@1258	2270	if (p == q) {
jaroslav@1258	2271	// We're already at this segment, so just skip to its end
jaroslav@1258	2272	while ((p <= end) && (path[p] != '\0'))
jaroslav@1258	2273	p++;
jaroslav@1258	2274	if (p <= end) {
jaroslav@1258	2275	// Preserve trailing slash
jaroslav@1258	2276	path[p++] = '/';
jaroslav@1258	2277	}
jaroslav@1258	2278	} else if (p < q) {
jaroslav@1258	2279	// Copy q down to p
jaroslav@1258	2280	while ((q <= end) && (path[q] != '\0'))
jaroslav@1258	2281	path[p++] = path[q++];
jaroslav@1258	2282	if (q <= end) {
jaroslav@1258	2283	// Preserve trailing slash
jaroslav@1258	2284	path[p++] = '/';
jaroslav@1258	2285	}
jaroslav@1258	2286	} else
jaroslav@1258	2287	throw new InternalError(); // ASSERT false
jaroslav@1258	2288	}
jaroslav@1258	2289
jaroslav@1258	2290	return p;
jaroslav@1258	2291	}
jaroslav@1258	2292
jaroslav@1258	2293
jaroslav@1258	2294	// Remove "." segments from the given path, and remove segment pairs
jaroslav@1258	2295	// consisting of a non-".." segment followed by a ".." segment.
jaroslav@1258	2296	//
jaroslav@1258	2297	private static void removeDots(char[] path, int[] segs) {
jaroslav@1258	2298	int ns = segs.length;
jaroslav@1258	2299	int end = path.length - 1;
jaroslav@1258	2300
jaroslav@1258	2301	for (int i = 0; i < ns; i++) {
jaroslav@1258	2302	int dots = 0; // Number of dots found (0, 1, or 2)
jaroslav@1258	2303
jaroslav@1258	2304	// Find next occurrence of "." or ".."
jaroslav@1258	2305	do {
jaroslav@1258	2306	int p = segs[i];
jaroslav@1258	2307	if (path[p] == '.') {
jaroslav@1258	2308	if (p == end) {
jaroslav@1258	2309	dots = 1;
jaroslav@1258	2310	break;
jaroslav@1258	2311	} else if (path[p + 1] == '\0') {
jaroslav@1258	2312	dots = 1;
jaroslav@1258	2313	break;
jaroslav@1258	2314	} else if ((path[p + 1] == '.')
jaroslav@1258	2315	&& ((p + 1 == end)
jaroslav@1258	2316	\|\| (path[p + 2] == '\0'))) {
jaroslav@1258	2317	dots = 2;
jaroslav@1258	2318	break;
jaroslav@1258	2319	}
jaroslav@1258	2320	}
jaroslav@1258	2321	i++;
jaroslav@1258	2322	} while (i < ns);
jaroslav@1258	2323	if ((i > ns) \|\| (dots == 0))
jaroslav@1258	2324	break;
jaroslav@1258	2325
jaroslav@1258	2326	if (dots == 1) {
jaroslav@1258	2327	// Remove this occurrence of "."
jaroslav@1258	2328	segs[i] = -1;
jaroslav@1258	2329	} else {
jaroslav@1258	2330	// If there is a preceding non-".." segment, remove both that
jaroslav@1258	2331	// segment and this occurrence of ".."; otherwise, leave this
jaroslav@1258	2332	// ".." segment as-is.
jaroslav@1258	2333	int j;
jaroslav@1258	2334	for (j = i - 1; j >= 0; j--) {
jaroslav@1258	2335	if (segs[j] != -1) break;
jaroslav@1258	2336	}
jaroslav@1258	2337	if (j >= 0) {
jaroslav@1258	2338	int q = segs[j];
jaroslav@1258	2339	if (!((path[q] == '.')
jaroslav@1258	2340	&& (path[q + 1] == '.')
jaroslav@1258	2341	&& (path[q + 2] == '\0'))) {
jaroslav@1258	2342	segs[i] = -1;
jaroslav@1258	2343	segs[j] = -1;
jaroslav@1258	2344	}
jaroslav@1258	2345	}
jaroslav@1258	2346	}
jaroslav@1258	2347	}
jaroslav@1258	2348	}
jaroslav@1258	2349
jaroslav@1258	2350
jaroslav@1258	2351	// DEVIATION: If the normalized path is relative, and if the first
jaroslav@1258	2352	// segment could be parsed as a scheme name, then prepend a "." segment
jaroslav@1258	2353	//
jaroslav@1258	2354	private static void maybeAddLeadingDot(char[] path, int[] segs) {
jaroslav@1258	2355
jaroslav@1258	2356	if (path[0] == '\0')
jaroslav@1258	2357	// The path is absolute
jaroslav@1258	2358	return;
jaroslav@1258	2359
jaroslav@1258	2360	int ns = segs.length;
jaroslav@1258	2361	int f = 0; // Index of first segment
jaroslav@1258	2362	while (f < ns) {
jaroslav@1258	2363	if (segs[f] >= 0)
jaroslav@1258	2364	break;
jaroslav@1258	2365	f++;
jaroslav@1258	2366	}
jaroslav@1258	2367	if ((f >= ns) \|\| (f == 0))
jaroslav@1258	2368	// The path is empty, or else the original first segment survived,
jaroslav@1258	2369	// in which case we already know that no leading "." is needed
jaroslav@1258	2370	return;
jaroslav@1258	2371
jaroslav@1258	2372	int p = segs[f];
jaroslav@1258	2373	while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
jaroslav@1258	2374	if (p >= path.length \|\| path[p] == '\0')
jaroslav@1258	2375	// No colon in first segment, so no "." needed
jaroslav@1258	2376	return;
jaroslav@1258	2377
jaroslav@1258	2378	// At this point we know that the first segment is unused,
jaroslav@1258	2379	// hence we can insert a "." segment at that position
jaroslav@1258	2380	path[0] = '.';
jaroslav@1258	2381	path[1] = '\0';
jaroslav@1258	2382	segs[0] = 0;
jaroslav@1258	2383	}
jaroslav@1258	2384
jaroslav@1258	2385
jaroslav@1258	2386	// Normalize the given path string. A normal path string has no empty
jaroslav@1258	2387	// segments (i.e., occurrences of "//"), no segments equal to ".", and no
jaroslav@1258	2388	// segments equal to ".." that are preceded by a segment not equal to "..".
jaroslav@1258	2389	// In contrast to Unix-style pathname normalization, for URI paths we
jaroslav@1258	2390	// always retain trailing slashes.
jaroslav@1258	2391	//
jaroslav@1258	2392	private static String normalize(String ps) {
jaroslav@1258	2393
jaroslav@1258	2394	// Does this path need normalization?
jaroslav@1258	2395	int ns = needsNormalization(ps); // Number of segments
jaroslav@1258	2396	if (ns < 0)
jaroslav@1258	2397	// Nope -- just return it
jaroslav@1258	2398	return ps;
jaroslav@1258	2399
jaroslav@1258	2400	char[] path = ps.toCharArray(); // Path in char-array form
jaroslav@1258	2401
jaroslav@1258	2402	// Split path into segments
jaroslav@1258	2403	int[] segs = new int[ns]; // Segment-index array
jaroslav@1258	2404	split(path, segs);
jaroslav@1258	2405
jaroslav@1258	2406	// Remove dots
jaroslav@1258	2407	removeDots(path, segs);
jaroslav@1258	2408
jaroslav@1258	2409	// Prevent scheme-name confusion
jaroslav@1258	2410	maybeAddLeadingDot(path, segs);
jaroslav@1258	2411
jaroslav@1258	2412	// Join the remaining segments and return the result
jaroslav@1258	2413	String s = new String(path, 0, join(path, segs));
jaroslav@1258	2414	if (s.equals(ps)) {
jaroslav@1258	2415	// string was already normalized
jaroslav@1258	2416	return ps;
jaroslav@1258	2417	}
jaroslav@1258	2418	return s;
jaroslav@1258	2419	}
jaroslav@1258	2420
jaroslav@1258	2421
jaroslav@1258	2422
jaroslav@1258	2423	// -- Character classes for parsing --
jaroslav@1258	2424
jaroslav@1258	2425	// RFC2396 precisely specifies which characters in the US-ASCII charset are
jaroslav@1258	2426	// permissible in the various components of a URI reference. We here
jaroslav@1258	2427	// define a set of mask pairs to aid in enforcing these restrictions. Each
jaroslav@1258	2428	// mask pair consists of two longs, a low mask and a high mask. Taken
jaroslav@1258	2429	// together they represent a 128-bit mask, where bit i is set iff the
jaroslav@1258	2430	// character with value i is permitted.
jaroslav@1258	2431	//
jaroslav@1258	2432	// This approach is more efficient than sequentially searching arrays of
jaroslav@1258	2433	// permitted characters. It could be made still more efficient by
jaroslav@1258	2434	// precompiling the mask information so that a character's presence in a
jaroslav@1258	2435	// given mask could be determined by a single table lookup.
jaroslav@1258	2436
jaroslav@1258	2437	// Compute the low-order mask for the characters in the given string
jaroslav@1258	2438	private static long lowMask(String chars) {
jaroslav@1258	2439	int n = chars.length();
jaroslav@1258	2440	long m = 0;
jaroslav@1258	2441	for (int i = 0; i < n; i++) {
jaroslav@1258	2442	char c = chars.charAt(i);
jaroslav@1258	2443	if (c < 64)
jaroslav@1258	2444	m \|= (1L << c);
jaroslav@1258	2445	}
jaroslav@1258	2446	return m;
jaroslav@1258	2447	}
jaroslav@1258	2448
jaroslav@1258	2449	// Compute the high-order mask for the characters in the given string
jaroslav@1258	2450	private static long highMask(String chars) {
jaroslav@1258	2451	int n = chars.length();
jaroslav@1258	2452	long m = 0;
jaroslav@1258	2453	for (int i = 0; i < n; i++) {
jaroslav@1258	2454	char c = chars.charAt(i);
jaroslav@1258	2455	if ((c >= 64) && (c < 128))
jaroslav@1258	2456	m \|= (1L << (c - 64));
jaroslav@1258	2457	}
jaroslav@1258	2458	return m;
jaroslav@1258	2459	}
jaroslav@1258	2460
jaroslav@1258	2461	// Compute a low-order mask for the characters
jaroslav@1258	2462	// between first and last, inclusive
jaroslav@1258	2463	private static long lowMask(char first, char last) {
jaroslav@1258	2464	long m = 0;
jaroslav@1258	2465	int f = Math.max(Math.min(first, 63), 0);
jaroslav@1258	2466	int l = Math.max(Math.min(last, 63), 0);
jaroslav@1258	2467	for (int i = f; i <= l; i++)
jaroslav@1258	2468	m \|= 1L << i;
jaroslav@1258	2469	return m;
jaroslav@1258	2470	}
jaroslav@1258	2471
jaroslav@1258	2472	// Compute a high-order mask for the characters
jaroslav@1258	2473	// between first and last, inclusive
jaroslav@1258	2474	private static long highMask(char first, char last) {
jaroslav@1258	2475	long m = 0;
jaroslav@1258	2476	int f = Math.max(Math.min(first, 127), 64) - 64;
jaroslav@1258	2477	int l = Math.max(Math.min(last, 127), 64) - 64;
jaroslav@1258	2478	for (int i = f; i <= l; i++)
jaroslav@1258	2479	m \|= 1L << i;
jaroslav@1258	2480	return m;
jaroslav@1258	2481	}
jaroslav@1258	2482
jaroslav@1258	2483	// Tell whether the given character is permitted by the given mask pair
jaroslav@1258	2484	private static boolean match(char c, long lowMask, long highMask) {
jaroslav@1258	2485	if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
jaroslav@1258	2486	return false;
jaroslav@1258	2487	if (c < 64)
jaroslav@1258	2488	return ((1L << c) & lowMask) != 0;
jaroslav@1258	2489	if (c < 128)
jaroslav@1258	2490	return ((1L << (c - 64)) & highMask) != 0;
jaroslav@1258	2491	return false;
jaroslav@1258	2492	}
jaroslav@1258	2493
jaroslav@1258	2494	// Character-class masks, in reverse order from RFC2396 because
jaroslav@1258	2495	// initializers for static fields cannot make forward references.
jaroslav@1258	2496
jaroslav@1258	2497	// digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \|
jaroslav@1258	2498	// "8" \| "9"
jaroslav@1258	2499	private static final long L_DIGIT = lowMask('0', '9');
jaroslav@1258	2500	private static final long H_DIGIT = 0L;
jaroslav@1258	2501
jaroslav@1258	2502	// upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \|
jaroslav@1258	2503	// "J" \| "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \|
jaroslav@1258	2504	// "S" \| "T" \| "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
jaroslav@1258	2505	private static final long L_UPALPHA = 0L;
jaroslav@1258	2506	private static final long H_UPALPHA = highMask('A', 'Z');
jaroslav@1258	2507
jaroslav@1258	2508	// lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \|
jaroslav@1258	2509	// "j" \| "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \|
jaroslav@1258	2510	// "s" \| "t" \| "u" \| "v" \| "w" \| "x" \| "y" \| "z"
jaroslav@1258	2511	private static final long L_LOWALPHA = 0L;
jaroslav@1258	2512	private static final long H_LOWALPHA = highMask('a', 'z');
jaroslav@1258	2513
jaroslav@1258	2514	// alpha = lowalpha \| upalpha
jaroslav@1258	2515	private static final long L_ALPHA = L_LOWALPHA \| L_UPALPHA;
jaroslav@1258	2516	private static final long H_ALPHA = H_LOWALPHA \| H_UPALPHA;
jaroslav@1258	2517
jaroslav@1258	2518	// alphanum = alpha \| digit
jaroslav@1258	2519	private static final long L_ALPHANUM = L_DIGIT \| L_ALPHA;
jaroslav@1258	2520	private static final long H_ALPHANUM = H_DIGIT \| H_ALPHA;
jaroslav@1258	2521
jaroslav@1258	2522	// hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
jaroslav@1258	2523	// "a" \| "b" \| "c" \| "d" \| "e" \| "f"
jaroslav@1258	2524	private static final long L_HEX = L_DIGIT;
jaroslav@1258	2525	private static final long H_HEX = highMask('A', 'F') \| highMask('a', 'f');
jaroslav@1258	2526
jaroslav@1258	2527	// mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \|
jaroslav@1258	2528	// "(" \| ")"
jaroslav@1258	2529	private static final long L_MARK = lowMask("-_.!~*'()");
jaroslav@1258	2530	private static final long H_MARK = highMask("-_.!~*'()");
jaroslav@1258	2531
jaroslav@1258	2532	// unreserved = alphanum \| mark
jaroslav@1258	2533	private static final long L_UNRESERVED = L_ALPHANUM \| L_MARK;
jaroslav@1258	2534	private static final long H_UNRESERVED = H_ALPHANUM \| H_MARK;
jaroslav@1258	2535
jaroslav@1258	2536	// reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
jaroslav@1258	2537	// "$" \| "," \| "[" \| "]"
jaroslav@1258	2538	// Added per RFC2732: "[", "]"
jaroslav@1258	2539	private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
jaroslav@1258	2540	private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
jaroslav@1258	2541
jaroslav@1258	2542	// The zero'th bit is used to indicate that escape pairs and non-US-ASCII
jaroslav@1258	2543	// characters are allowed; this is handled by the scanEscape method below.
jaroslav@1258	2544	private static final long L_ESCAPED = 1L;
jaroslav@1258	2545	private static final long H_ESCAPED = 0L;
jaroslav@1258	2546
jaroslav@1258	2547	// uric = reserved \| unreserved \| escaped
jaroslav@1258	2548	private static final long L_URIC = L_RESERVED \| L_UNRESERVED \| L_ESCAPED;
jaroslav@1258	2549	private static final long H_URIC = H_RESERVED \| H_UNRESERVED \| H_ESCAPED;
jaroslav@1258	2550
jaroslav@1258	2551	// pchar = unreserved \| escaped \|
jaroslav@1258	2552	// ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
jaroslav@1258	2553	private static final long L_PCHAR
jaroslav@1258	2554	= L_UNRESERVED \| L_ESCAPED \| lowMask(":@&=+$,");
jaroslav@1258	2555	private static final long H_PCHAR
jaroslav@1258	2556	= H_UNRESERVED \| H_ESCAPED \| highMask(":@&=+$,");
jaroslav@1258	2557
jaroslav@1258	2558	// All valid path characters
jaroslav@1258	2559	private static final long L_PATH = L_PCHAR \| lowMask(";/");
jaroslav@1258	2560	private static final long H_PATH = H_PCHAR \| highMask(";/");
jaroslav@1258	2561
jaroslav@1258	2562	// Dash, for use in domainlabel and toplabel
jaroslav@1258	2563	private static final long L_DASH = lowMask("-");
jaroslav@1258	2564	private static final long H_DASH = highMask("-");
jaroslav@1258	2565
jaroslav@1258	2566	// Dot, for use in hostnames
jaroslav@1258	2567	private static final long L_DOT = lowMask(".");
jaroslav@1258	2568	private static final long H_DOT = highMask(".");
jaroslav@1258	2569
jaroslav@1258	2570	// userinfo = *( unreserved \| escaped \|
jaroslav@1258	2571	// ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
jaroslav@1258	2572	private static final long L_USERINFO
jaroslav@1258	2573	= L_UNRESERVED \| L_ESCAPED \| lowMask(";:&=+$,");
jaroslav@1258	2574	private static final long H_USERINFO
jaroslav@1258	2575	= H_UNRESERVED \| H_ESCAPED \| highMask(";:&=+$,");
jaroslav@1258	2576
jaroslav@1258	2577	// reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
jaroslav@1258	2578	// ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
jaroslav@1258	2579	private static final long L_REG_NAME
jaroslav@1258	2580	= L_UNRESERVED \| L_ESCAPED \| lowMask("$,;:@&=+");
jaroslav@1258	2581	private static final long H_REG_NAME
jaroslav@1258	2582	= H_UNRESERVED \| H_ESCAPED \| highMask("$,;:@&=+");
jaroslav@1258	2583
jaroslav@1258	2584	// All valid characters for server-based authorities
jaroslav@1258	2585	private static final long L_SERVER
jaroslav@1258	2586	= L_USERINFO \| L_ALPHANUM \| L_DASH \| lowMask(".:@[]");
jaroslav@1258	2587	private static final long H_SERVER
jaroslav@1258	2588	= H_USERINFO \| H_ALPHANUM \| H_DASH \| highMask(".:@[]");
jaroslav@1258	2589
jaroslav@1258	2590	// Special case of server authority that represents an IPv6 address
jaroslav@1258	2591	// In this case, a % does not signify an escape sequence
jaroslav@1258	2592	private static final long L_SERVER_PERCENT
jaroslav@1258	2593	= L_SERVER \| lowMask("%");
jaroslav@1258	2594	private static final long H_SERVER_PERCENT
jaroslav@1258	2595	= H_SERVER \| highMask("%");
jaroslav@1258	2596	private static final long L_LEFT_BRACKET = lowMask("[");
jaroslav@1258	2597	private static final long H_LEFT_BRACKET = highMask("[");
jaroslav@1258	2598
jaroslav@1258	2599	// scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
jaroslav@1258	2600	private static final long L_SCHEME = L_ALPHA \| L_DIGIT \| lowMask("+-.");
jaroslav@1258	2601	private static final long H_SCHEME = H_ALPHA \| H_DIGIT \| highMask("+-.");
jaroslav@1258	2602
jaroslav@1258	2603	// uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
jaroslav@1258	2604	// "&" \| "=" \| "+" \| "$" \| ","
jaroslav@1258	2605	private static final long L_URIC_NO_SLASH
jaroslav@1258	2606	= L_UNRESERVED \| L_ESCAPED \| lowMask(";?:@&=+$,");
jaroslav@1258	2607	private static final long H_URIC_NO_SLASH
jaroslav@1258	2608	= H_UNRESERVED \| H_ESCAPED \| highMask(";?:@&=+$,");
jaroslav@1258	2609
jaroslav@1258	2610
jaroslav@1258	2611	// -- Escaping and encoding --
jaroslav@1258	2612
jaroslav@1258	2613	private final static char[] hexDigits = {
jaroslav@1258	2614	'0', '1', '2', '3', '4', '5', '6', '7',
jaroslav@1258	2615	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
jaroslav@1258	2616	};
jaroslav@1258	2617
jaroslav@1258	2618	private static void appendEscape(StringBuffer sb, byte b) {
jaroslav@1258	2619	sb.append('%');
jaroslav@1258	2620	sb.append(hexDigits[(b >> 4) & 0x0f]);
jaroslav@1258	2621	sb.append(hexDigits[(b >> 0) & 0x0f]);
jaroslav@1258	2622	}
jaroslav@1258	2623
jaroslav@1258	2624	private static void appendEncoded(StringBuffer sb, char c) {
jaroslav@1260	2625	/*
jaroslav@1258	2626	ByteBuffer bb = null;
jaroslav@1258	2627	try {
jaroslav@1258	2628	bb = ThreadLocalCoders.encoderFor("UTF-8")
jaroslav@1258	2629	.encode(CharBuffer.wrap("" + c));
jaroslav@1258	2630	} catch (CharacterCodingException x) {
jaroslav@1258	2631	assert false;
jaroslav@1258	2632	}
jaroslav@1258	2633	while (bb.hasRemaining()) {
jaroslav@1258	2634	int b = bb.get() & 0xff;
jaroslav@1258	2635	if (b >= 0x80)
jaroslav@1258	2636	appendEscape(sb, (byte)b);
jaroslav@1258	2637	else
jaroslav@1258	2638	sb.append((char)b);
jaroslav@1258	2639	}
jaroslav@1260	2640	*/
jaroslav@1258	2641	}
jaroslav@1258	2642
jaroslav@1258	2643	// Quote any characters in s that are not permitted
jaroslav@1258	2644	// by the given mask pair
jaroslav@1258	2645	//
jaroslav@1258	2646	private static String quote(String s, long lowMask, long highMask) {
jaroslav@1258	2647	int n = s.length();
jaroslav@1258	2648	StringBuffer sb = null;
jaroslav@1258	2649	boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
jaroslav@1258	2650	for (int i = 0; i < s.length(); i++) {
jaroslav@1258	2651	char c = s.charAt(i);
jaroslav@1258	2652	if (c < '\u0080') {
jaroslav@1258	2653	if (!match(c, lowMask, highMask)) {
jaroslav@1258	2654	if (sb == null) {
jaroslav@1258	2655	sb = new StringBuffer();
jaroslav@1258	2656	sb.append(s.substring(0, i));
jaroslav@1258	2657	}
jaroslav@1258	2658	appendEscape(sb, (byte)c);
jaroslav@1258	2659	} else {
jaroslav@1258	2660	if (sb != null)
jaroslav@1258	2661	sb.append(c);
jaroslav@1258	2662	}
jaroslav@1258	2663	} else if (allowNonASCII
jaroslav@1258	2664	&& (Character.isSpaceChar(c)
jaroslav@1258	2665	\|\| Character.isISOControl(c))) {
jaroslav@1258	2666	if (sb == null) {
jaroslav@1258	2667	sb = new StringBuffer();
jaroslav@1258	2668	sb.append(s.substring(0, i));
jaroslav@1258	2669	}
jaroslav@1258	2670	appendEncoded(sb, c);
jaroslav@1258	2671	} else {
jaroslav@1258	2672	if (sb != null)
jaroslav@1258	2673	sb.append(c);
jaroslav@1258	2674	}
jaroslav@1258	2675	}
jaroslav@1258	2676	return (sb == null) ? s : sb.toString();
jaroslav@1258	2677	}
jaroslav@1258	2678
jaroslav@1258	2679	// Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
jaroslav@1258	2680	// assuming that s is otherwise legal
jaroslav@1258	2681	//
jaroslav@1258	2682	private static String encode(String s) {
jaroslav@1258	2683	int n = s.length();
jaroslav@1258	2684	if (n == 0)
jaroslav@1258	2685	return s;
jaroslav@1258	2686
jaroslav@1258	2687	// First check whether we actually need to encode
jaroslav@1258	2688	for (int i = 0;;) {
jaroslav@1258	2689	if (s.charAt(i) >= '\u0080')
jaroslav@1258	2690	break;
jaroslav@1258	2691	if (++i >= n)
jaroslav@1258	2692	return s;
jaroslav@1258	2693	}
jaroslav@1260	2694	/*
jaroslav@1258	2695	String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
jaroslav@1258	2696	ByteBuffer bb = null;
jaroslav@1258	2697	try {
jaroslav@1258	2698	bb = ThreadLocalCoders.encoderFor("UTF-8")
jaroslav@1258	2699	.encode(CharBuffer.wrap(ns));
jaroslav@1258	2700	} catch (CharacterCodingException x) {
jaroslav@1258	2701	assert false;
jaroslav@1258	2702	}
jaroslav@1260	2703	*/
jaroslav@1258	2704	StringBuffer sb = new StringBuffer();
jaroslav@1260	2705	/*
jaroslav@1258	2706	while (bb.hasRemaining()) {
jaroslav@1258	2707	int b = bb.get() & 0xff;
jaroslav@1258	2708	if (b >= 0x80)
jaroslav@1258	2709	appendEscape(sb, (byte)b);
jaroslav@1258	2710	else
jaroslav@1258	2711	sb.append((char)b);
jaroslav@1258	2712	}
jaroslav@1260	2713	*/
jaroslav@1258	2714	return sb.toString();
jaroslav@1258	2715	}
jaroslav@1258	2716
jaroslav@1258	2717	private static int decode(char c) {
jaroslav@1258	2718	if ((c >= '0') && (c <= '9'))
jaroslav@1258	2719	return c - '0';
jaroslav@1258	2720	if ((c >= 'a') && (c <= 'f'))
jaroslav@1258	2721	return c - 'a' + 10;
jaroslav@1258	2722	if ((c >= 'A') && (c <= 'F'))
jaroslav@1258	2723	return c - 'A' + 10;
jaroslav@1258	2724	assert false;
jaroslav@1258	2725	return -1;
jaroslav@1258	2726	}
jaroslav@1258	2727
jaroslav@1258	2728	private static byte decode(char c1, char c2) {
jaroslav@1258	2729	return (byte)( ((decode(c1) & 0xf) << 4)
jaroslav@1258	2730	\| ((decode(c2) & 0xf) << 0));
jaroslav@1258	2731	}
jaroslav@1258	2732
jaroslav@1258	2733	// Evaluates all escapes in s, applying UTF-8 decoding if needed. Assumes
jaroslav@1258	2734	// that escapes are well-formed syntactically, i.e., of the form %XX. If a
jaroslav@1258	2735	// sequence of escaped octets is not valid UTF-8 then the erroneous octets
jaroslav@1258	2736	// are replaced with '\uFFFD'.
jaroslav@1258	2737	// Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
jaroslav@1258	2738	// with a scope_id
jaroslav@1258	2739	//
jaroslav@1258	2740	private static String decode(String s) {
jaroslav@1258	2741	if (s == null)
jaroslav@1258	2742	return s;
jaroslav@1258	2743	int n = s.length();
jaroslav@1258	2744	if (n == 0)
jaroslav@1258	2745	return s;
jaroslav@1258	2746	if (s.indexOf('%') < 0)
jaroslav@1258	2747	return s;
jaroslav@1258	2748
jaroslav@1258	2749	StringBuffer sb = new StringBuffer(n);
jaroslav@1260	2750	/*
jaroslav@1258	2751	ByteBuffer bb = ByteBuffer.allocate(n);
jaroslav@1258	2752	CharBuffer cb = CharBuffer.allocate(n);
jaroslav@1258	2753	CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
jaroslav@1258	2754	.onMalformedInput(CodingErrorAction.REPLACE)
jaroslav@1258	2755	.onUnmappableCharacter(CodingErrorAction.REPLACE);
jaroslav@1258	2756
jaroslav@1258	2757	// This is not horribly efficient, but it will do for now
jaroslav@1258	2758	char c = s.charAt(0);
jaroslav@1258	2759	boolean betweenBrackets = false;
jaroslav@1258	2760
jaroslav@1258	2761	for (int i = 0; i < n;) {
jaroslav@1258	2762	assert c == s.charAt(i); // Loop invariant
jaroslav@1258	2763	if (c == '[') {
jaroslav@1258	2764	betweenBrackets = true;
jaroslav@1258	2765	} else if (betweenBrackets && c == ']') {
jaroslav@1258	2766	betweenBrackets = false;
jaroslav@1258	2767	}
jaroslav@1258	2768	if (c != '%' \|\| betweenBrackets) {
jaroslav@1258	2769	sb.append(c);
jaroslav@1258	2770	if (++i >= n)
jaroslav@1258	2771	break;
jaroslav@1258	2772	c = s.charAt(i);
jaroslav@1258	2773	continue;
jaroslav@1258	2774	}
jaroslav@1258	2775	bb.clear();
jaroslav@1258	2776	int ui = i;
jaroslav@1258	2777	for (;;) {
jaroslav@1258	2778	assert (n - i >= 2);
jaroslav@1258	2779	bb.put(decode(s.charAt(++i), s.charAt(++i)));
jaroslav@1258	2780	if (++i >= n)
jaroslav@1258	2781	break;
jaroslav@1258	2782	c = s.charAt(i);
jaroslav@1258	2783	if (c != '%')
jaroslav@1258	2784	break;
jaroslav@1258	2785	}
jaroslav@1258	2786	bb.flip();
jaroslav@1258	2787	cb.clear();
jaroslav@1258	2788	dec.reset();
jaroslav@1258	2789	CoderResult cr = dec.decode(bb, cb, true);
jaroslav@1258	2790	assert cr.isUnderflow();
jaroslav@1258	2791	cr = dec.flush(cb);
jaroslav@1258	2792	assert cr.isUnderflow();
jaroslav@1258	2793	sb.append(cb.flip().toString());
jaroslav@1258	2794	}
jaroslav@1260	2795	*/
jaroslav@1258	2796	return sb.toString();
jaroslav@1258	2797	}
jaroslav@1258	2798
jaroslav@1258	2799
jaroslav@1258	2800	// -- Parsing --
jaroslav@1258	2801
jaroslav@1258	2802	// For convenience we wrap the input URI string in a new instance of the
jaroslav@1258	2803	// following internal class. This saves always having to pass the input
jaroslav@1258	2804	// string as an argument to each internal scan/parse method.
jaroslav@1258	2805
jaroslav@1258	2806	private class Parser {
jaroslav@1258	2807
jaroslav@1258	2808	private String input; // URI input string
jaroslav@1258	2809	private boolean requireServerAuthority = false;
jaroslav@1258	2810
jaroslav@1258	2811	Parser(String s) {
jaroslav@1258	2812	input = s;
jaroslav@1258	2813	string = s;
jaroslav@1258	2814	}
jaroslav@1258	2815
jaroslav@1258	2816	// -- Methods for throwing URISyntaxException in various ways --
jaroslav@1258	2817
jaroslav@1258	2818	private void fail(String reason) throws URISyntaxException {
jaroslav@1258	2819	throw new URISyntaxException(input, reason);
jaroslav@1258	2820	}
jaroslav@1258	2821
jaroslav@1258	2822	private void fail(String reason, int p) throws URISyntaxException {
jaroslav@1258	2823	throw new URISyntaxException(input, reason, p);
jaroslav@1258	2824	}
jaroslav@1258	2825
jaroslav@1258	2826	private void failExpecting(String expected, int p)
jaroslav@1258	2827	throws URISyntaxException
jaroslav@1258	2828	{
jaroslav@1258	2829	fail("Expected " + expected, p);
jaroslav@1258	2830	}
jaroslav@1258	2831
jaroslav@1258	2832	private void failExpecting(String expected, String prior, int p)
jaroslav@1258	2833	throws URISyntaxException
jaroslav@1258	2834	{
jaroslav@1258	2835	fail("Expected " + expected + " following " + prior, p);
jaroslav@1258	2836	}
jaroslav@1258	2837
jaroslav@1258	2838
jaroslav@1258	2839	// -- Simple access to the input string --
jaroslav@1258	2840
jaroslav@1258	2841	// Return a substring of the input string
jaroslav@1258	2842	//
jaroslav@1258	2843	private String substring(int start, int end) {
jaroslav@1258	2844	return input.substring(start, end);
jaroslav@1258	2845	}
jaroslav@1258	2846
jaroslav@1258	2847	// Return the char at position p,
jaroslav@1258	2848	// assuming that p < input.length()
jaroslav@1258	2849	//
jaroslav@1258	2850	private char charAt(int p) {
jaroslav@1258	2851	return input.charAt(p);
jaroslav@1258	2852	}
jaroslav@1258	2853
jaroslav@1258	2854	// Tells whether start < end and, if so, whether charAt(start) == c
jaroslav@1258	2855	//
jaroslav@1258	2856	private boolean at(int start, int end, char c) {
jaroslav@1258	2857	return (start < end) && (charAt(start) == c);
jaroslav@1258	2858	}
jaroslav@1258	2859
jaroslav@1258	2860	// Tells whether start + s.length() < end and, if so,
jaroslav@1258	2861	// whether the chars at the start position match s exactly
jaroslav@1258	2862	//
jaroslav@1258	2863	private boolean at(int start, int end, String s) {
jaroslav@1258	2864	int p = start;
jaroslav@1258	2865	int sn = s.length();
jaroslav@1258	2866	if (sn > end - p)
jaroslav@1258	2867	return false;
jaroslav@1258	2868	int i = 0;
jaroslav@1258	2869	while (i < sn) {
jaroslav@1258	2870	if (charAt(p++) != s.charAt(i)) {
jaroslav@1258	2871	break;
jaroslav@1258	2872	}
jaroslav@1258	2873	i++;
jaroslav@1258	2874	}
jaroslav@1258	2875	return (i == sn);
jaroslav@1258	2876	}
jaroslav@1258	2877
jaroslav@1258	2878
jaroslav@1258	2879	// -- Scanning --
jaroslav@1258	2880
jaroslav@1258	2881	// The various scan and parse methods that follow use a uniform
jaroslav@1258	2882	// convention of taking the current start position and end index as
jaroslav@1258	2883	// their first two arguments. The start is inclusive while the end is
jaroslav@1258	2884	// exclusive, just as in the String class, i.e., a start/end pair
jaroslav@1258	2885	// denotes the left-open interval [start, end) of the input string.
jaroslav@1258	2886	//
jaroslav@1258	2887	// These methods never proceed past the end position. They may return
jaroslav@1258	2888	// -1 to indicate outright failure, but more often they simply return
jaroslav@1258	2889	// the position of the first char after the last char scanned. Thus
jaroslav@1258	2890	// a typical idiom is
jaroslav@1258	2891	//
jaroslav@1258	2892	// int p = start;
jaroslav@1258	2893	// int q = scan(p, end, ...);
jaroslav@1258	2894	// if (q > p)
jaroslav@1258	2895	// // We scanned something
jaroslav@1258	2896	// ...;
jaroslav@1258	2897	// else if (q == p)
jaroslav@1258	2898	// // We scanned nothing
jaroslav@1258	2899	// ...;
jaroslav@1258	2900	// else if (q == -1)
jaroslav@1258	2901	// // Something went wrong
jaroslav@1258	2902	// ...;
jaroslav@1258	2903
jaroslav@1258	2904
jaroslav@1258	2905	// Scan a specific char: If the char at the given start position is
jaroslav@1258	2906	// equal to c, return the index of the next char; otherwise, return the
jaroslav@1258	2907	// start position.
jaroslav@1258	2908	//
jaroslav@1258	2909	private int scan(int start, int end, char c) {
jaroslav@1258	2910	if ((start < end) && (charAt(start) == c))
jaroslav@1258	2911	return start + 1;
jaroslav@1258	2912	return start;
jaroslav@1258	2913	}
jaroslav@1258	2914
jaroslav@1258	2915	// Scan forward from the given start position. Stop at the first char
jaroslav@1258	2916	// in the err string (in which case -1 is returned), or the first char
jaroslav@1258	2917	// in the stop string (in which case the index of the preceding char is
jaroslav@1258	2918	// returned), or the end of the input string (in which case the length
jaroslav@1258	2919	// of the input string is returned). May return the start position if
jaroslav@1258	2920	// nothing matches.
jaroslav@1258	2921	//
jaroslav@1258	2922	private int scan(int start, int end, String err, String stop) {
jaroslav@1258	2923	int p = start;
jaroslav@1258	2924	while (p < end) {
jaroslav@1258	2925	char c = charAt(p);
jaroslav@1258	2926	if (err.indexOf(c) >= 0)
jaroslav@1258	2927	return -1;
jaroslav@1258	2928	if (stop.indexOf(c) >= 0)
jaroslav@1258	2929	break;
jaroslav@1258	2930	p++;
jaroslav@1258	2931	}
jaroslav@1258	2932	return p;
jaroslav@1258	2933	}
jaroslav@1258	2934
jaroslav@1258	2935	// Scan a potential escape sequence, starting at the given position,
jaroslav@1258	2936	// with the given first char (i.e., charAt(start) == c).
jaroslav@1258	2937	//
jaroslav@1258	2938	// This method assumes that if escapes are allowed then visible
jaroslav@1258	2939	// non-US-ASCII chars are also allowed.
jaroslav@1258	2940	//
jaroslav@1258	2941	private int scanEscape(int start, int n, char first)
jaroslav@1258	2942	throws URISyntaxException
jaroslav@1258	2943	{
jaroslav@1258	2944	int p = start;
jaroslav@1258	2945	char c = first;
jaroslav@1258	2946	if (c == '%') {
jaroslav@1258	2947	// Process escape pair
jaroslav@1258	2948	if ((p + 3 <= n)
jaroslav@1258	2949	&& match(charAt(p + 1), L_HEX, H_HEX)
jaroslav@1258	2950	&& match(charAt(p + 2), L_HEX, H_HEX)) {
jaroslav@1258	2951	return p + 3;
jaroslav@1258	2952	}
jaroslav@1258	2953	fail("Malformed escape pair", p);
jaroslav@1258	2954	} else if ((c > 128)
jaroslav@1258	2955	&& !Character.isSpaceChar(c)
jaroslav@1258	2956	&& !Character.isISOControl(c)) {
jaroslav@1258	2957	// Allow unescaped but visible non-US-ASCII chars
jaroslav@1258	2958	return p + 1;
jaroslav@1258	2959	}
jaroslav@1258	2960	return p;
jaroslav@1258	2961	}
jaroslav@1258	2962
jaroslav@1258	2963	// Scan chars that match the given mask pair
jaroslav@1258	2964	//
jaroslav@1258	2965	private int scan(int start, int n, long lowMask, long highMask)
jaroslav@1258	2966	throws URISyntaxException
jaroslav@1258	2967	{
jaroslav@1258	2968	int p = start;
jaroslav@1258	2969	while (p < n) {
jaroslav@1258	2970	char c = charAt(p);
jaroslav@1258	2971	if (match(c, lowMask, highMask)) {
jaroslav@1258	2972	p++;
jaroslav@1258	2973	continue;
jaroslav@1258	2974	}
jaroslav@1258	2975	if ((lowMask & L_ESCAPED) != 0) {
jaroslav@1258	2976	int q = scanEscape(p, n, c);
jaroslav@1258	2977	if (q > p) {
jaroslav@1258	2978	p = q;
jaroslav@1258	2979	continue;
jaroslav@1258	2980	}
jaroslav@1258	2981	}
jaroslav@1258	2982	break;
jaroslav@1258	2983	}
jaroslav@1258	2984	return p;
jaroslav@1258	2985	}
jaroslav@1258	2986
jaroslav@1258	2987	// Check that each of the chars in [start, end) matches the given mask
jaroslav@1258	2988	//
jaroslav@1258	2989	private void checkChars(int start, int end,
jaroslav@1258	2990	long lowMask, long highMask,
jaroslav@1258	2991	String what)
jaroslav@1258	2992	throws URISyntaxException
jaroslav@1258	2993	{
jaroslav@1258	2994	int p = scan(start, end, lowMask, highMask);
jaroslav@1258	2995	if (p < end)
jaroslav@1258	2996	fail("Illegal character in " + what, p);
jaroslav@1258	2997	}
jaroslav@1258	2998
jaroslav@1258	2999	// Check that the char at position p matches the given mask
jaroslav@1258	3000	//
jaroslav@1258	3001	private void checkChar(int p,
jaroslav@1258	3002	long lowMask, long highMask,
jaroslav@1258	3003	String what)
jaroslav@1258	3004	throws URISyntaxException
jaroslav@1258	3005	{
jaroslav@1258	3006	checkChars(p, p + 1, lowMask, highMask, what);
jaroslav@1258	3007	}
jaroslav@1258	3008
jaroslav@1258	3009
jaroslav@1258	3010	// -- Parsing --
jaroslav@1258	3011
jaroslav@1258	3012	// [<scheme>:]<scheme-specific-part>[#<fragment>]
jaroslav@1258	3013	//
jaroslav@1258	3014	void parse(boolean rsa) throws URISyntaxException {
jaroslav@1258	3015	requireServerAuthority = rsa;
jaroslav@1258	3016	int ssp; // Start of scheme-specific part
jaroslav@1258	3017	int n = input.length();
jaroslav@1258	3018	int p = scan(0, n, "/?#", ":");
jaroslav@1258	3019	if ((p >= 0) && at(p, n, ':')) {
jaroslav@1258	3020	if (p == 0)
jaroslav@1258	3021	failExpecting("scheme name", 0);
jaroslav@1258	3022	checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
jaroslav@1258	3023	checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
jaroslav@1258	3024	scheme = substring(0, p);
jaroslav@1258	3025	p++; // Skip ':'
jaroslav@1258	3026	ssp = p;
jaroslav@1258	3027	if (at(p, n, '/')) {
jaroslav@1258	3028	p = parseHierarchical(p, n);
jaroslav@1258	3029	} else {
jaroslav@1258	3030	int q = scan(p, n, "", "#");
jaroslav@1258	3031	if (q <= p)
jaroslav@1258	3032	failExpecting("scheme-specific part", p);
jaroslav@1258	3033	checkChars(p, q, L_URIC, H_URIC, "opaque part");
jaroslav@1258	3034	p = q;
jaroslav@1258	3035	}
jaroslav@1258	3036	} else {
jaroslav@1258	3037	ssp = 0;
jaroslav@1258	3038	p = parseHierarchical(0, n);
jaroslav@1258	3039	}
jaroslav@1258	3040	schemeSpecificPart = substring(ssp, p);
jaroslav@1258	3041	if (at(p, n, '#')) {
jaroslav@1258	3042	checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
jaroslav@1258	3043	fragment = substring(p + 1, n);
jaroslav@1258	3044	p = n;
jaroslav@1258	3045	}
jaroslav@1258	3046	if (p < n)
jaroslav@1258	3047	fail("end of URI", p);
jaroslav@1258	3048	}
jaroslav@1258	3049
jaroslav@1258	3050	// [//authority]<path>[?<query>]
jaroslav@1258	3051	//
jaroslav@1258	3052	// DEVIATION from RFC2396: We allow an empty authority component as
jaroslav@1258	3053	// long as it's followed by a non-empty path, query component, or
jaroslav@1258	3054	// fragment component. This is so that URIs such as "file:///foo/bar"
jaroslav@1258	3055	// will parse. This seems to be the intent of RFC2396, though the
jaroslav@1258	3056	// grammar does not permit it. If the authority is empty then the
jaroslav@1258	3057	// userInfo, host, and port components are undefined.
jaroslav@1258	3058	//
jaroslav@1258	3059	// DEVIATION from RFC2396: We allow empty relative paths. This seems
jaroslav@1258	3060	// to be the intent of RFC2396, but the grammar does not permit it.
jaroslav@1258	3061	// The primary consequence of this deviation is that "#f" parses as a
jaroslav@1258	3062	// relative URI with an empty path.
jaroslav@1258	3063	//
jaroslav@1258	3064	private int parseHierarchical(int start, int n)
jaroslav@1258	3065	throws URISyntaxException
jaroslav@1258	3066	{
jaroslav@1258	3067	int p = start;
jaroslav@1258	3068	if (at(p, n, '/') && at(p + 1, n, '/')) {
jaroslav@1258	3069	p += 2;
jaroslav@1258	3070	int q = scan(p, n, "", "/?#");
jaroslav@1258	3071	if (q > p) {
jaroslav@1258	3072	p = parseAuthority(p, q);
jaroslav@1258	3073	} else if (q < n) {
jaroslav@1258	3074	// DEVIATION: Allow empty authority prior to non-empty
jaroslav@1258	3075	// path, query component or fragment identifier
jaroslav@1258	3076	} else
jaroslav@1258	3077	failExpecting("authority", p);
jaroslav@1258	3078	}
jaroslav@1258	3079	int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
jaroslav@1258	3080	checkChars(p, q, L_PATH, H_PATH, "path");
jaroslav@1258	3081	path = substring(p, q);
jaroslav@1258	3082	p = q;
jaroslav@1258	3083	if (at(p, n, '?')) {
jaroslav@1258	3084	p++;
jaroslav@1258	3085	q = scan(p, n, "", "#");
jaroslav@1258	3086	checkChars(p, q, L_URIC, H_URIC, "query");
jaroslav@1258	3087	query = substring(p, q);
jaroslav@1258	3088	p = q;
jaroslav@1258	3089	}
jaroslav@1258	3090	return p;
jaroslav@1258	3091	}
jaroslav@1258	3092
jaroslav@1258	3093	// authority = server \| reg_name
jaroslav@1258	3094	//
jaroslav@1258	3095	// Ambiguity: An authority that is a registry name rather than a server
jaroslav@1258	3096	// might have a prefix that parses as a server. We use the fact that
jaroslav@1258	3097	// the authority component is always followed by '/' or the end of the
jaroslav@1258	3098	// input string to resolve this: If the complete authority did not
jaroslav@1258	3099	// parse as a server then we try to parse it as a registry name.
jaroslav@1258	3100	//
jaroslav@1258	3101	private int parseAuthority(int start, int n)
jaroslav@1258	3102	throws URISyntaxException
jaroslav@1258	3103	{
jaroslav@1258	3104	int p = start;
jaroslav@1258	3105	int q = p;
jaroslav@1258	3106	URISyntaxException ex = null;
jaroslav@1258	3107
jaroslav@1258	3108	boolean serverChars;
jaroslav@1258	3109	boolean regChars;
jaroslav@1258	3110
jaroslav@1258	3111	if (scan(p, n, "", "]") > p) {
jaroslav@1258	3112	// contains a literal IPv6 address, therefore % is allowed
jaroslav@1258	3113	serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
jaroslav@1258	3114	} else {
jaroslav@1258	3115	serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
jaroslav@1258	3116	}
jaroslav@1258	3117	regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
jaroslav@1258	3118
jaroslav@1258	3119	if (regChars && !serverChars) {
jaroslav@1258	3120	// Must be a registry-based authority
jaroslav@1258	3121	authority = substring(p, n);
jaroslav@1258	3122	return n;
jaroslav@1258	3123	}
jaroslav@1258	3124
jaroslav@1258	3125	if (serverChars) {
jaroslav@1258	3126	// Might be (probably is) a server-based authority, so attempt
jaroslav@1258	3127	// to parse it as such. If the attempt fails, try to treat it
jaroslav@1258	3128	// as a registry-based authority.
jaroslav@1258	3129	try {
jaroslav@1258	3130	q = parseServer(p, n);
jaroslav@1258	3131	if (q < n)
jaroslav@1258	3132	failExpecting("end of authority", q);
jaroslav@1258	3133	authority = substring(p, n);
jaroslav@1258	3134	} catch (URISyntaxException x) {
jaroslav@1258	3135	// Undo results of failed parse
jaroslav@1258	3136	userInfo = null;
jaroslav@1258	3137	host = null;
jaroslav@1258	3138	port = -1;
jaroslav@1258	3139	if (requireServerAuthority) {
jaroslav@1258	3140	// If we're insisting upon a server-based authority,
jaroslav@1258	3141	// then just re-throw the exception
jaroslav@1258	3142	throw x;
jaroslav@1258	3143	} else {
jaroslav@1258	3144	// Save the exception in case it doesn't parse as a
jaroslav@1258	3145	// registry either
jaroslav@1258	3146	ex = x;
jaroslav@1258	3147	q = p;
jaroslav@1258	3148	}
jaroslav@1258	3149	}
jaroslav@1258	3150	}
jaroslav@1258	3151
jaroslav@1258	3152	if (q < n) {
jaroslav@1258	3153	if (regChars) {
jaroslav@1258	3154	// Registry-based authority
jaroslav@1258	3155	authority = substring(p, n);
jaroslav@1258	3156	} else if (ex != null) {
jaroslav@1258	3157	// Re-throw exception; it was probably due to
jaroslav@1258	3158	// a malformed IPv6 address
jaroslav@1258	3159	throw ex;
jaroslav@1258	3160	} else {
jaroslav@1258	3161	fail("Illegal character in authority", q);
jaroslav@1258	3162	}
jaroslav@1258	3163	}
jaroslav@1258	3164
jaroslav@1258	3165	return n;
jaroslav@1258	3166	}
jaroslav@1258	3167
jaroslav@1258	3168
jaroslav@1258	3169	// [<userinfo>@]<host>[:<port>]
jaroslav@1258	3170	//
jaroslav@1258	3171	private int parseServer(int start, int n)
jaroslav@1258	3172	throws URISyntaxException
jaroslav@1258	3173	{
jaroslav@1258	3174	int p = start;
jaroslav@1258	3175	int q;
jaroslav@1258	3176
jaroslav@1258	3177	// userinfo
jaroslav@1258	3178	q = scan(p, n, "/?#", "@");
jaroslav@1258	3179	if ((q >= p) && at(q, n, '@')) {
jaroslav@1258	3180	checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
jaroslav@1258	3181	userInfo = substring(p, q);
jaroslav@1258	3182	p = q + 1; // Skip '@'
jaroslav@1258	3183	}
jaroslav@1258	3184
jaroslav@1258	3185	// hostname, IPv4 address, or IPv6 address
jaroslav@1258	3186	if (at(p, n, '[')) {
jaroslav@1258	3187	// DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
jaroslav@1258	3188	p++;
jaroslav@1258	3189	q = scan(p, n, "/?#", "]");
jaroslav@1258	3190	if ((q > p) && at(q, n, ']')) {
jaroslav@1258	3191	// look for a "%" scope id
jaroslav@1258	3192	int r = scan (p, q, "", "%");
jaroslav@1258	3193	if (r > p) {
jaroslav@1258	3194	parseIPv6Reference(p, r);
jaroslav@1258	3195	if (r+1 == q) {
jaroslav@1258	3196	fail ("scope id expected");
jaroslav@1258	3197	}
jaroslav@1258	3198	checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
jaroslav@1258	3199	"scope id");
jaroslav@1258	3200	} else {
jaroslav@1258	3201	parseIPv6Reference(p, q);
jaroslav@1258	3202	}
jaroslav@1258	3203	host = substring(p-1, q+1);
jaroslav@1258	3204	p = q + 1;
jaroslav@1258	3205	} else {
jaroslav@1258	3206	failExpecting("closing bracket for IPv6 address", q);
jaroslav@1258	3207	}
jaroslav@1258	3208	} else {
jaroslav@1258	3209	q = parseIPv4Address(p, n);
jaroslav@1258	3210	if (q <= p)
jaroslav@1258	3211	q = parseHostname(p, n);
jaroslav@1258	3212	p = q;
jaroslav@1258	3213	}
jaroslav@1258	3214
jaroslav@1258	3215	// port
jaroslav@1258	3216	if (at(p, n, ':')) {
jaroslav@1258	3217	p++;
jaroslav@1258	3218	q = scan(p, n, "", "/");
jaroslav@1258	3219	if (q > p) {
jaroslav@1258	3220	checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
jaroslav@1258	3221	try {
jaroslav@1258	3222	port = Integer.parseInt(substring(p, q));
jaroslav@1258	3223	} catch (NumberFormatException x) {
jaroslav@1258	3224	fail("Malformed port number", p);
jaroslav@1258	3225	}
jaroslav@1258	3226	p = q;
jaroslav@1258	3227	}
jaroslav@1258	3228	}
jaroslav@1258	3229	if (p < n)
jaroslav@1258	3230	failExpecting("port number", p);
jaroslav@1258	3231
jaroslav@1258	3232	return p;
jaroslav@1258	3233	}
jaroslav@1258	3234
jaroslav@1258	3235	// Scan a string of decimal digits whose value fits in a byte
jaroslav@1258	3236	//
jaroslav@1258	3237	private int scanByte(int start, int n)
jaroslav@1258	3238	throws URISyntaxException
jaroslav@1258	3239	{
jaroslav@1258	3240	int p = start;
jaroslav@1258	3241	int q = scan(p, n, L_DIGIT, H_DIGIT);
jaroslav@1258	3242	if (q <= p) return q;
jaroslav@1258	3243	if (Integer.parseInt(substring(p, q)) > 255) return p;
jaroslav@1258	3244	return q;
jaroslav@1258	3245	}
jaroslav@1258	3246
jaroslav@1258	3247	// Scan an IPv4 address.
jaroslav@1258	3248	//
jaroslav@1258	3249	// If the strict argument is true then we require that the given
jaroslav@1258	3250	// interval contain nothing besides an IPv4 address; if it is false
jaroslav@1258	3251	// then we only require that it start with an IPv4 address.
jaroslav@1258	3252	//
jaroslav@1258	3253	// If the interval does not contain or start with (depending upon the
jaroslav@1258	3254	// strict argument) a legal IPv4 address characters then we return -1
jaroslav@1258	3255	// immediately; otherwise we insist that these characters parse as a
jaroslav@1258	3256	// legal IPv4 address and throw an exception on failure.
jaroslav@1258	3257	//
jaroslav@1258	3258	// We assume that any string of decimal digits and dots must be an IPv4
jaroslav@1258	3259	// address. It won't parse as a hostname anyway, so making that
jaroslav@1258	3260	// assumption here allows more meaningful exceptions to be thrown.
jaroslav@1258	3261	//
jaroslav@1258	3262	private int scanIPv4Address(int start, int n, boolean strict)
jaroslav@1258	3263	throws URISyntaxException
jaroslav@1258	3264	{
jaroslav@1258	3265	int p = start;
jaroslav@1258	3266	int q;
jaroslav@1258	3267	int m = scan(p, n, L_DIGIT \| L_DOT, H_DIGIT \| H_DOT);
jaroslav@1258	3268	if ((m <= p) \|\| (strict && (m != n)))
jaroslav@1258	3269	return -1;
jaroslav@1258	3270	for (;;) {
jaroslav@1258	3271	// Per RFC2732: At most three digits per byte
jaroslav@1258	3272	// Further constraint: Each element fits in a byte
jaroslav@1258	3273	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3274	if ((q = scan(p, m, '.')) <= p) break; p = q;
jaroslav@1258	3275	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3276	if ((q = scan(p, m, '.')) <= p) break; p = q;
jaroslav@1258	3277	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3278	if ((q = scan(p, m, '.')) <= p) break; p = q;
jaroslav@1258	3279	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3280	if (q < m) break;
jaroslav@1258	3281	return q;
jaroslav@1258	3282	}
jaroslav@1258	3283	fail("Malformed IPv4 address", q);
jaroslav@1258	3284	return -1;
jaroslav@1258	3285	}
jaroslav@1258	3286
jaroslav@1258	3287	// Take an IPv4 address: Throw an exception if the given interval
jaroslav@1258	3288	// contains anything except an IPv4 address
jaroslav@1258	3289	//
jaroslav@1258	3290	private int takeIPv4Address(int start, int n, String expected)
jaroslav@1258	3291	throws URISyntaxException
jaroslav@1258	3292	{
jaroslav@1258	3293	int p = scanIPv4Address(start, n, true);
jaroslav@1258	3294	if (p <= start)
jaroslav@1258	3295	failExpecting(expected, start);
jaroslav@1258	3296	return p;
jaroslav@1258	3297	}
jaroslav@1258	3298
jaroslav@1258	3299	// Attempt to parse an IPv4 address, returning -1 on failure but
jaroslav@1258	3300	// allowing the given interval to contain [:<characters>] after
jaroslav@1258	3301	// the IPv4 address.
jaroslav@1258	3302	//
jaroslav@1258	3303	private int parseIPv4Address(int start, int n) {
jaroslav@1258	3304	int p;
jaroslav@1258	3305
jaroslav@1258	3306	try {
jaroslav@1258	3307	p = scanIPv4Address(start, n, false);
jaroslav@1258	3308	} catch (URISyntaxException x) {
jaroslav@1258	3309	return -1;
jaroslav@1258	3310	} catch (NumberFormatException nfe) {
jaroslav@1258	3311	return -1;
jaroslav@1258	3312	}
jaroslav@1258	3313
jaroslav@1258	3314	if (p > start && p < n) {
jaroslav@1258	3315	// IPv4 address is followed by something - check that
jaroslav@1258	3316	// it's a ":" as this is the only valid character to
jaroslav@1258	3317	// follow an address.
jaroslav@1258	3318	if (charAt(p) != ':') {
jaroslav@1258	3319	p = -1;
jaroslav@1258	3320	}
jaroslav@1258	3321	}
jaroslav@1258	3322
jaroslav@1258	3323	if (p > start)
jaroslav@1258	3324	host = substring(start, p);
jaroslav@1258	3325
jaroslav@1258	3326	return p;
jaroslav@1258	3327	}
jaroslav@1258	3328
jaroslav@1258	3329	// hostname = domainlabel [ "." ] \| 1*( domainlabel "." ) toplabel [ "." ]
jaroslav@1258	3330	// domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum
jaroslav@1258	3331	// toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum
jaroslav@1258	3332	//
jaroslav@1258	3333	private int parseHostname(int start, int n)
jaroslav@1258	3334	throws URISyntaxException
jaroslav@1258	3335	{
jaroslav@1258	3336	int p = start;
jaroslav@1258	3337	int q;
jaroslav@1258	3338	int l = -1; // Start of last parsed label
jaroslav@1258	3339
jaroslav@1258	3340	do {
jaroslav@1258	3341	// domainlabel = alphanum [ *( alphanum \| "-" ) alphanum ]
jaroslav@1258	3342	q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
jaroslav@1258	3343	if (q <= p)
jaroslav@1258	3344	break;
jaroslav@1258	3345	l = p;
jaroslav@1258	3346	if (q > p) {
jaroslav@1258	3347	p = q;
jaroslav@1258	3348	q = scan(p, n, L_ALPHANUM \| L_DASH, H_ALPHANUM \| H_DASH);
jaroslav@1258	3349	if (q > p) {
jaroslav@1258	3350	if (charAt(q - 1) == '-')
jaroslav@1258	3351	fail("Illegal character in hostname", q - 1);
jaroslav@1258	3352	p = q;
jaroslav@1258	3353	}
jaroslav@1258	3354	}
jaroslav@1258	3355	q = scan(p, n, '.');
jaroslav@1258	3356	if (q <= p)
jaroslav@1258	3357	break;
jaroslav@1258	3358	p = q;
jaroslav@1258	3359	} while (p < n);
jaroslav@1258	3360
jaroslav@1258	3361	if ((p < n) && !at(p, n, ':'))
jaroslav@1258	3362	fail("Illegal character in hostname", p);
jaroslav@1258	3363
jaroslav@1258	3364	if (l < 0)
jaroslav@1258	3365	failExpecting("hostname", start);
jaroslav@1258	3366
jaroslav@1258	3367	// for a fully qualified hostname check that the rightmost
jaroslav@1258	3368	// label starts with an alpha character.
jaroslav@1258	3369	if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
jaroslav@1258	3370	fail("Illegal character in hostname", l);
jaroslav@1258	3371	}
jaroslav@1258	3372
jaroslav@1258	3373	host = substring(start, p);
jaroslav@1258	3374	return p;
jaroslav@1258	3375	}
jaroslav@1258	3376
jaroslav@1258	3377
jaroslav@1258	3378	// IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
jaroslav@1258	3379	//
jaroslav@1258	3380	// Bug: The grammar in RFC2373 Appendix B does not allow addresses of
jaroslav@1258	3381	// the form ::12.34.56.78, which are clearly shown in the examples
jaroslav@1258	3382	// earlier in the document. Here is the original grammar:
jaroslav@1258	3383	//
jaroslav@1258	3384	// IPv6address = hexpart [ ":" IPv4address ]
jaroslav@1258	3385	// hexpart = hexseq \| hexseq "::" [ hexseq ] \| "::" [ hexseq ]
jaroslav@1258	3386	// hexseq = hex4 *( ":" hex4)
jaroslav@1258	3387	// hex4 = 1*4HEXDIG
jaroslav@1258	3388	//
jaroslav@1258	3389	// We therefore use the following revised grammar:
jaroslav@1258	3390	//
jaroslav@1258	3391	// IPv6address = hexseq [ ":" IPv4address ]
jaroslav@1258	3392	// \| hexseq [ "::" [ hexpost ] ]
jaroslav@1258	3393	// \| "::" [ hexpost ]
jaroslav@1258	3394	// hexpost = hexseq \| hexseq ":" IPv4address \| IPv4address
jaroslav@1258	3395	// hexseq = hex4 *( ":" hex4)
jaroslav@1258	3396	// hex4 = 1*4HEXDIG
jaroslav@1258	3397	//
jaroslav@1258	3398	// This covers all and only the following cases:
jaroslav@1258	3399	//
jaroslav@1258	3400	// hexseq
jaroslav@1258	3401	// hexseq : IPv4address
jaroslav@1258	3402	// hexseq ::
jaroslav@1258	3403	// hexseq :: hexseq
jaroslav@1258	3404	// hexseq :: hexseq : IPv4address
jaroslav@1258	3405	// hexseq :: IPv4address
jaroslav@1258	3406	// :: hexseq
jaroslav@1258	3407	// :: hexseq : IPv4address
jaroslav@1258	3408	// :: IPv4address
jaroslav@1258	3409	// ::
jaroslav@1258	3410	//
jaroslav@1258	3411	// Additionally we constrain the IPv6 address as follows :-
jaroslav@1258	3412	//
jaroslav@1258	3413	// i. IPv6 addresses without compressed zeros should contain
jaroslav@1258	3414	// exactly 16 bytes.
jaroslav@1258	3415	//
jaroslav@1258	3416	// ii. IPv6 addresses with compressed zeros should contain
jaroslav@1258	3417	// less than 16 bytes.
jaroslav@1258	3418
jaroslav@1258	3419	private int ipv6byteCount = 0;
jaroslav@1258	3420
jaroslav@1258	3421	private int parseIPv6Reference(int start, int n)
jaroslav@1258	3422	throws URISyntaxException
jaroslav@1258	3423	{
jaroslav@1258	3424	int p = start;
jaroslav@1258	3425	int q;
jaroslav@1258	3426	boolean compressedZeros = false;
jaroslav@1258	3427
jaroslav@1258	3428	q = scanHexSeq(p, n);
jaroslav@1258	3429
jaroslav@1258	3430	if (q > p) {
jaroslav@1258	3431	p = q;
jaroslav@1258	3432	if (at(p, n, "::")) {
jaroslav@1258	3433	compressedZeros = true;
jaroslav@1258	3434	p = scanHexPost(p + 2, n);
jaroslav@1258	3435	} else if (at(p, n, ':')) {
jaroslav@1258	3436	p = takeIPv4Address(p + 1, n, "IPv4 address");
jaroslav@1258	3437	ipv6byteCount += 4;
jaroslav@1258	3438	}
jaroslav@1258	3439	} else if (at(p, n, "::")) {
jaroslav@1258	3440	compressedZeros = true;
jaroslav@1258	3441	p = scanHexPost(p + 2, n);
jaroslav@1258	3442	}
jaroslav@1258	3443	if (p < n)
jaroslav@1258	3444	fail("Malformed IPv6 address", start);
jaroslav@1258	3445	if (ipv6byteCount > 16)
jaroslav@1258	3446	fail("IPv6 address too long", start);
jaroslav@1258	3447	if (!compressedZeros && ipv6byteCount < 16)
jaroslav@1258	3448	fail("IPv6 address too short", start);
jaroslav@1258	3449	if (compressedZeros && ipv6byteCount == 16)
jaroslav@1258	3450	fail("Malformed IPv6 address", start);
jaroslav@1258	3451
jaroslav@1258	3452	return p;
jaroslav@1258	3453	}
jaroslav@1258	3454
jaroslav@1258	3455	private int scanHexPost(int start, int n)
jaroslav@1258	3456	throws URISyntaxException
jaroslav@1258	3457	{
jaroslav@1258	3458	int p = start;
jaroslav@1258	3459	int q;
jaroslav@1258	3460
jaroslav@1258	3461	if (p == n)
jaroslav@1258	3462	return p;
jaroslav@1258	3463
jaroslav@1258	3464	q = scanHexSeq(p, n);
jaroslav@1258	3465	if (q > p) {
jaroslav@1258	3466	p = q;
jaroslav@1258	3467	if (at(p, n, ':')) {
jaroslav@1258	3468	p++;
jaroslav@1258	3469	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
jaroslav@1258	3470	ipv6byteCount += 4;
jaroslav@1258	3471	}
jaroslav@1258	3472	} else {
jaroslav@1258	3473	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
jaroslav@1258	3474	ipv6byteCount += 4;
jaroslav@1258	3475	}
jaroslav@1258	3476	return p;
jaroslav@1258	3477	}
jaroslav@1258	3478
jaroslav@1258	3479	// Scan a hex sequence; return -1 if one could not be scanned
jaroslav@1258	3480	//
jaroslav@1258	3481	private int scanHexSeq(int start, int n)
jaroslav@1258	3482	throws URISyntaxException
jaroslav@1258	3483	{
jaroslav@1258	3484	int p = start;
jaroslav@1258	3485	int q;
jaroslav@1258	3486
jaroslav@1258	3487	q = scan(p, n, L_HEX, H_HEX);
jaroslav@1258	3488	if (q <= p)
jaroslav@1258	3489	return -1;
jaroslav@1258	3490	if (at(q, n, '.')) // Beginning of IPv4 address
jaroslav@1258	3491	return -1;
jaroslav@1258	3492	if (q > p + 4)
jaroslav@1258	3493	fail("IPv6 hexadecimal digit sequence too long", p);
jaroslav@1258	3494	ipv6byteCount += 2;
jaroslav@1258	3495	p = q;
jaroslav@1258	3496	while (p < n) {
jaroslav@1258	3497	if (!at(p, n, ':'))
jaroslav@1258	3498	break;
jaroslav@1258	3499	if (at(p + 1, n, ':'))
jaroslav@1258	3500	break; // "::"
jaroslav@1258	3501	p++;
jaroslav@1258	3502	q = scan(p, n, L_HEX, H_HEX);
jaroslav@1258	3503	if (q <= p)
jaroslav@1258	3504	failExpecting("digits for an IPv6 address", p);
jaroslav@1258	3505	if (at(q, n, '.')) { // Beginning of IPv4 address
jaroslav@1258	3506	p--;
jaroslav@1258	3507	break;
jaroslav@1258	3508	}
jaroslav@1258	3509	if (q > p + 4)
jaroslav@1258	3510	fail("IPv6 hexadecimal digit sequence too long", p);
jaroslav@1258	3511	ipv6byteCount += 2;
jaroslav@1258	3512	p = q;
jaroslav@1258	3513	}
jaroslav@1258	3514
jaroslav@1258	3515	return p;
jaroslav@1258	3516	}
jaroslav@1258	3517
jaroslav@1258	3518	}
jaroslav@1258	3519
jaroslav@1258	3520	}

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Thu, 31 Oct 2013 11:23:54 +0100
changeset 1398	9926996eca2d
parent 1259	d257b7a37635
permissions	-rw-r--r--