hg/bck2brwsr: emul/compact/src/main/java/java/net/URI.java@724f3e1ea53e (annotated)

jaroslav@1258	1	/*
jaroslav@1258	2	* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
jaroslav@1258	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@1258	4	*
jaroslav@1258	5	* This code is free software; you can redistribute it and/or modify it
jaroslav@1258	6	* under the terms of the GNU General Public License version 2 only, as
jaroslav@1258	7	* published by the Free Software Foundation. Oracle designates this
jaroslav@1258	8	* particular file as subject to the "Classpath" exception as provided
jaroslav@1258	9	* by Oracle in the LICENSE file that accompanied this code.
jaroslav@1258	10	*
jaroslav@1258	11	* This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@1258	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@1258	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
jaroslav@1258	14	* version 2 for more details (a copy is included in the LICENSE file that
jaroslav@1258	15	* accompanied this code).
jaroslav@1258	16	*
jaroslav@1258	17	* You should have received a copy of the GNU General Public License version
jaroslav@1258	18	* 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@1258	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@1258	20	*
jaroslav@1258	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@1258	22	* or visit www.oracle.com if you need additional information or have any
jaroslav@1258	23	* questions.
jaroslav@1258	24	*/
jaroslav@1258	25
jaroslav@1258	26	package java.net;
jaroslav@1258	27
jaroslav@1258	28	import java.io.IOException;
jaroslav@1258	29	import java.io.InvalidObjectException;
jaroslav@1258	30	import java.io.ObjectInputStream;
jaroslav@1258	31	import java.io.ObjectOutputStream;
jaroslav@1258	32	import java.io.Serializable;
jaroslav@1258	33	import java.nio.ByteBuffer;
jaroslav@1258	34	import java.nio.CharBuffer;
jaroslav@1258	35	import java.nio.charset.CharsetDecoder;
jaroslav@1258	36	import java.nio.charset.CharsetEncoder;
jaroslav@1258	37	import java.nio.charset.CoderResult;
jaroslav@1258	38	import java.nio.charset.CodingErrorAction;
jaroslav@1258	39	import java.nio.charset.CharacterCodingException;
jaroslav@1258	40	import java.text.Normalizer;
jaroslav@1258	41	import sun.nio.cs.ThreadLocalCoders;
jaroslav@1258	42
jaroslav@1258	43	import java.lang.Character; // for javadoc
jaroslav@1258	44	import java.lang.NullPointerException; // for javadoc
jaroslav@1258	45
jaroslav@1258	46
jaroslav@1258	47	/**
jaroslav@1258	48	* Represents a Uniform Resource Identifier (URI) reference.
jaroslav@1258	49	*
jaroslav@1258	50	* <p> Aside from some minor deviations noted below, an instance of this
jaroslav@1258	51	* class represents a URI reference as defined by
jaroslav@1258	52	* <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC 2396: Uniform
jaroslav@1258	53	* Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
jaroslav@1258	54	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
jaroslav@1258	55	* Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
jaroslav@1258	56	* also supports scope_ids. The syntax and usage of scope_ids is described
jaroslav@1258	57	* <a href="Inet6Address.html#scoped">here</a>.
jaroslav@1258	58	* This class provides constructors for creating URI instances from
jaroslav@1258	59	* their components or by parsing their string forms, methods for accessing the
jaroslav@1258	60	* various components of an instance, and methods for normalizing, resolving,
jaroslav@1258	61	* and relativizing URI instances. Instances of this class are immutable.
jaroslav@1258	62	*
jaroslav@1258	63	*
jaroslav@1258	64	* <h4> URI syntax and components </h4>
jaroslav@1258	65	*
jaroslav@1258	66	* At the highest level a URI reference (hereinafter simply "URI") in string
jaroslav@1258	67	* form has the syntax
jaroslav@1258	68	*
jaroslav@1258	69	* <blockquote>
jaroslav@1258	70	* [<i>scheme</i><tt><b>:</b></tt><i></i>]<i>scheme-specific-part</i>[<tt><b>#</b></tt><i>fragment</i>]
jaroslav@1258	71	* </blockquote>
jaroslav@1258	72	*
jaroslav@1258	73	* where square brackets [...] delineate optional components and the characters
jaroslav@1258	74	* <tt><b>:</b></tt> and <tt><b>#</b></tt> stand for themselves.
jaroslav@1258	75	*
jaroslav@1258	76	* <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
jaroslav@1258	77	* said to be <i>relative</i>. URIs are also classified according to whether
jaroslav@1258	78	* they are <i>opaque</i> or <i>hierarchical</i>.
jaroslav@1258	79	*
jaroslav@1258	80	* <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
jaroslav@1258	81	* not begin with a slash character (<tt>'/'</tt>). Opaque URIs are not
jaroslav@1258	82	* subject to further parsing. Some examples of opaque URIs are:
jaroslav@1258	83	*
jaroslav@1258	84	* <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
jaroslav@1258	85	* <tr><td><tt>mailto:java-net@java.sun.com</tt><td></tr>
jaroslav@1258	86	* <tr><td><tt>news:comp.lang.java</tt><td></tr>
jaroslav@1258	87	* <tr><td><tt>urn:isbn:096139210x</tt></td></tr>
jaroslav@1258	88	* </table></blockquote>
jaroslav@1258	89	*
jaroslav@1258	90	* <p> A <i>hierarchical</i> URI is either an absolute URI whose
jaroslav@1258	91	* scheme-specific part begins with a slash character, or a relative URI, that
jaroslav@1258	92	* is, a URI that does not specify a scheme. Some examples of hierarchical
jaroslav@1258	93	* URIs are:
jaroslav@1258	94	*
jaroslav@1258	95	* <blockquote>
jaroslav@1258	96	* <tt>http://java.sun.com/j2se/1.3/</tt><br>
jaroslav@1258	97	* <tt>docs/guide/collections/designfaq.html#28</tt><br>
jaroslav@1258	98	* <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java</tt><br>
jaroslav@1258	99	* <tt>file:///~/calendar</tt>
jaroslav@1258	100	* </blockquote>
jaroslav@1258	101	*
jaroslav@1258	102	* <p> A hierarchical URI is subject to further parsing according to the syntax
jaroslav@1258	103	*
jaroslav@1258	104	* <blockquote>
jaroslav@1258	105	* [<i>scheme</i><tt><b>:</b></tt>][<tt><b>//</b></tt><i>authority</i>][<i>path</i>][<tt><b>?</b></tt><i>query</i>][<tt><b>#</b></tt><i>fragment</i>]
jaroslav@1258	106	* </blockquote>
jaroslav@1258	107	*
jaroslav@1258	108	* where the characters <tt><b>:</b></tt>, <tt><b>/</b></tt>,
jaroslav@1258	109	* <tt><b>?</b></tt>, and <tt><b>#</b></tt> stand for themselves. The
jaroslav@1258	110	* scheme-specific part of a hierarchical URI consists of the characters
jaroslav@1258	111	* between the scheme and fragment components.
jaroslav@1258	112	*
jaroslav@1258	113	* <p> The authority component of a hierarchical URI is, if specified, either
jaroslav@1258	114	* <i>server-based</i> or <i>registry-based</i>. A server-based authority
jaroslav@1258	115	* parses according to the familiar syntax
jaroslav@1258	116	*
jaroslav@1258	117	* <blockquote>
jaroslav@1258	118	* [<i>user-info</i><tt><b>@</b></tt>]<i>host</i>[<tt><b>:</b></tt><i>port</i>]
jaroslav@1258	119	* </blockquote>
jaroslav@1258	120	*
jaroslav@1258	121	* where the characters <tt><b>@</b></tt> and <tt><b>:</b></tt> stand for
jaroslav@1258	122	* themselves. Nearly all URI schemes currently in use are server-based. An
jaroslav@1258	123	* authority component that does not parse in this way is considered to be
jaroslav@1258	124	* registry-based.
jaroslav@1258	125	*
jaroslav@1258	126	* <p> The path component of a hierarchical URI is itself said to be absolute
jaroslav@1258	127	* if it begins with a slash character (<tt>'/'</tt>); otherwise it is
jaroslav@1258	128	* relative. The path of a hierarchical URI that is either absolute or
jaroslav@1258	129	* specifies an authority is always absolute.
jaroslav@1258	130	*
jaroslav@1258	131	* <p> All told, then, a URI instance has the following nine components:
jaroslav@1258	132	*
jaroslav@1258	133	* <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
jaroslav@1258	134	* <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
jaroslav@1258	135	* <tr><td>scheme</td><td><tt>String</tt></td></tr>
jaroslav@1258	136	* <tr><td>scheme-specific-part    </td><td><tt>String</tt></td></tr>
jaroslav@1258	137	* <tr><td>authority</td><td><tt>String</tt></td></tr>
jaroslav@1258	138	* <tr><td>user-info</td><td><tt>String</tt></td></tr>
jaroslav@1258	139	* <tr><td>host</td><td><tt>String</tt></td></tr>
jaroslav@1258	140	* <tr><td>port</td><td><tt>int</tt></td></tr>
jaroslav@1258	141	* <tr><td>path</td><td><tt>String</tt></td></tr>
jaroslav@1258	142	* <tr><td>query</td><td><tt>String</tt></td></tr>
jaroslav@1258	143	* <tr><td>fragment</td><td><tt>String</tt></td></tr>
jaroslav@1258	144	* </table></blockquote>
jaroslav@1258	145	*
jaroslav@1258	146	* In a given instance any particular component is either <i>undefined</i> or
jaroslav@1258	147	* <i>defined</i> with a distinct value. Undefined string components are
jaroslav@1258	148	* represented by <tt>null</tt>, while undefined integer components are
jaroslav@1258	149	* represented by <tt>-1</tt>. A string component may be defined to have the
jaroslav@1258	150	* empty string as its value; this is not equivalent to that component being
jaroslav@1258	151	* undefined.
jaroslav@1258	152	*
jaroslav@1258	153	* <p> Whether a particular component is or is not defined in an instance
jaroslav@1258	154	* depends upon the type of the URI being represented. An absolute URI has a
jaroslav@1258	155	* scheme component. An opaque URI has a scheme, a scheme-specific part, and
jaroslav@1258	156	* possibly a fragment, but has no other components. A hierarchical URI always
jaroslav@1258	157	* has a path (though it may be empty) and a scheme-specific-part (which at
jaroslav@1258	158	* least contains the path), and may have any of the other components. If the
jaroslav@1258	159	* authority component is present and is server-based then the host component
jaroslav@1258	160	* will be defined and the user-information and port components may be defined.
jaroslav@1258	161	*
jaroslav@1258	162	*
jaroslav@1258	163	* <h4> Operations on URI instances </h4>
jaroslav@1258	164	*
jaroslav@1258	165	* The key operations supported by this class are those of
jaroslav@1258	166	* <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
jaroslav@1258	167	*
jaroslav@1258	168	* <p> <i>Normalization</i> is the process of removing unnecessary <tt>"."</tt>
jaroslav@1258	169	* and <tt>".."</tt> segments from the path component of a hierarchical URI.
jaroslav@1258	170	* Each <tt>"."</tt> segment is simply removed. A <tt>".."</tt> segment is
jaroslav@1258	171	* removed only if it is preceded by a non-<tt>".."</tt> segment.
jaroslav@1258	172	* Normalization has no effect upon opaque URIs.
jaroslav@1258	173	*
jaroslav@1258	174	* <p> <i>Resolution</i> is the process of resolving one URI against another,
jaroslav@1258	175	* <i>base</i> URI. The resulting URI is constructed from components of both
jaroslav@1258	176	* URIs in the manner specified by RFC 2396, taking components from the
jaroslav@1258	177	* base URI for those not specified in the original. For hierarchical URIs,
jaroslav@1258	178	* the path of the original is resolved against the path of the base and then
jaroslav@1258	179	* normalized. The result, for example, of resolving
jaroslav@1258	180	*
jaroslav@1258	181	* <blockquote>
jaroslav@1258	182	* <tt>docs/guide/collections/designfaq.html#28          </tt>(1)
jaroslav@1258	183	* </blockquote>
jaroslav@1258	184	*
jaroslav@1258	185	* against the base URI <tt>http://java.sun.com/j2se/1.3/</tt> is the result
jaroslav@1258	186	* URI
jaroslav@1258	187	*
jaroslav@1258	188	* <blockquote>
jaroslav@1258	189	* <tt>http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28</tt>
jaroslav@1258	190	* </blockquote>
jaroslav@1258	191	*
jaroslav@1258	192	* Resolving the relative URI
jaroslav@1258	193	*
jaroslav@1258	194	* <blockquote>
jaroslav@1258	195	* <tt>../../../demo/jfc/SwingSet2/src/SwingSet2.java    </tt>(2)
jaroslav@1258	196	* </blockquote>
jaroslav@1258	197	*
jaroslav@1258	198	* against this result yields, in turn,
jaroslav@1258	199	*
jaroslav@1258	200	* <blockquote>
jaroslav@1258	201	* <tt>http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java</tt>
jaroslav@1258	202	* </blockquote>
jaroslav@1258	203	*
jaroslav@1258	204	* Resolution of both absolute and relative URIs, and of both absolute and
jaroslav@1258	205	* relative paths in the case of hierarchical URIs, is supported. Resolving
jaroslav@1258	206	* the URI <tt>file:///~calendar</tt> against any other URI simply yields the
jaroslav@1258	207	* original URI, since it is absolute. Resolving the relative URI (2) above
jaroslav@1258	208	* against the relative base URI (1) yields the normalized, but still relative,
jaroslav@1258	209	* URI
jaroslav@1258	210	*
jaroslav@1258	211	* <blockquote>
jaroslav@1258	212	* <tt>demo/jfc/SwingSet2/src/SwingSet2.java</tt>
jaroslav@1258	213	* </blockquote>
jaroslav@1258	214	*
jaroslav@1258	215	* <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
jaroslav@1258	216	* two normalized URIs <i>u</i> and <i>v</i>,
jaroslav@1258	217	*
jaroslav@1258	218	* <blockquote>
jaroslav@1258	219	* <i>u</i><tt>.relativize(</tt><i>u</i><tt>.resolve(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  and<br>
jaroslav@1258	220	* <i>u</i><tt>.resolve(</tt><i>u</i><tt>.relativize(</tt><i>v</i><tt>)).equals(</tt><i>v</i><tt>)</tt>  .<br>
jaroslav@1258	221	* </blockquote>
jaroslav@1258	222	*
jaroslav@1258	223	* This operation is often useful when constructing a document containing URIs
jaroslav@1258	224	* that must be made relative to the base URI of the document wherever
jaroslav@1258	225	* possible. For example, relativizing the URI
jaroslav@1258	226	*
jaroslav@1258	227	* <blockquote>
jaroslav@1258	228	* <tt>http://java.sun.com/j2se/1.3/docs/guide/index.html</tt>
jaroslav@1258	229	* </blockquote>
jaroslav@1258	230	*
jaroslav@1258	231	* against the base URI
jaroslav@1258	232	*
jaroslav@1258	233	* <blockquote>
jaroslav@1258	234	* <tt>http://java.sun.com/j2se/1.3</tt>
jaroslav@1258	235	* </blockquote>
jaroslav@1258	236	*
jaroslav@1258	237	* yields the relative URI <tt>docs/guide/index.html</tt>.
jaroslav@1258	238	*
jaroslav@1258	239	*
jaroslav@1258	240	* <h4> Character categories </h4>
jaroslav@1258	241	*
jaroslav@1258	242	* RFC 2396 specifies precisely which characters are permitted in the
jaroslav@1258	243	* various components of a URI reference. The following categories, most of
jaroslav@1258	244	* which are taken from that specification, are used below to describe these
jaroslav@1258	245	* constraints:
jaroslav@1258	246	*
jaroslav@1258	247	* <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
jaroslav@1258	248	* <tr><th valign=top><i>alpha</i></th>
jaroslav@1258	249	* <td>The US-ASCII alphabetic characters,
jaroslav@1258	250	* <tt>'A'</tt> through <tt>'Z'</tt>
jaroslav@1258	251	* and <tt>'a'</tt> through <tt>'z'</tt></td></tr>
jaroslav@1258	252	* <tr><th valign=top><i>digit</i></th>
jaroslav@1258	253	* <td>The US-ASCII decimal digit characters,
jaroslav@1258	254	* <tt>'0'</tt> through <tt>'9'</tt></td></tr>
jaroslav@1258	255	* <tr><th valign=top><i>alphanum</i></th>
jaroslav@1258	256	* <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
jaroslav@1258	257	* <tr><th valign=top><i>unreserved</i>    </th>
jaroslav@1258	258	* <td>All <i>alphanum</i> characters together with those in the string
jaroslav@1258	259	* <tt>"_-!.~'()*"</tt></td></tr>
jaroslav@1258	260	* <tr><th valign=top><i>punct</i></th>
jaroslav@1258	261	* <td>The characters in the string <tt>",;:$&+="</tt></td></tr>
jaroslav@1258	262	* <tr><th valign=top><i>reserved</i></th>
jaroslav@1258	263	* <td>All <i>punct</i> characters together with those in the string
jaroslav@1258	264	* <tt>"?/[]@"</tt></td></tr>
jaroslav@1258	265	* <tr><th valign=top><i>escaped</i></th>
jaroslav@1258	266	* <td>Escaped octets, that is, triplets consisting of the percent
jaroslav@1258	267	* character (<tt>'%'</tt>) followed by two hexadecimal digits
jaroslav@1258	268	* (<tt>'0'</tt>-<tt>'9'</tt>, <tt>'A'</tt>-<tt>'F'</tt>, and
jaroslav@1258	269	* <tt>'a'</tt>-<tt>'f'</tt>)</td></tr>
jaroslav@1258	270	* <tr><th valign=top><i>other</i></th>
jaroslav@1258	271	* <td>The Unicode characters that are not in the US-ASCII character set,
jaroslav@1258	272	* are not control characters (according to the {@link
jaroslav@1258	273	* java.lang.Character#isISOControl(char) Character.isISOControl}
jaroslav@1258	274	* method), and are not space characters (according to the {@link
jaroslav@1258	275	* java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
jaroslav@1258	276	* method)  <i>(<b>Deviation from RFC 2396</b>, which is
jaroslav@1258	277	* limited to US-ASCII)</i></td></tr>
jaroslav@1258	278	* </table></blockquote>
jaroslav@1258	279	*
jaroslav@1258	280	* <p><a name="legal-chars"></a> The set of all legal URI characters consists of
jaroslav@1258	281	* the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
jaroslav@1258	282	* characters.
jaroslav@1258	283	*
jaroslav@1258	284	*
jaroslav@1258	285	* <h4> Escaped octets, quotation, encoding, and decoding </h4>
jaroslav@1258	286	*
jaroslav@1258	287	* RFC 2396 allows escaped octets to appear in the user-info, path, query, and
jaroslav@1258	288	* fragment components. Escaping serves two purposes in URIs:
jaroslav@1258	289	*
jaroslav@1258	290	* <ul>
jaroslav@1258	291	*
jaroslav@1258	292	* <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
jaroslav@1258	293	* conform strictly to RFC 2396 by not containing any <i>other</i>
jaroslav@1258	294	* characters. </p></li>
jaroslav@1258	295	*
jaroslav@1258	296	* <li><p> To <i>quote</i> characters that are otherwise illegal in a
jaroslav@1258	297	* component. The user-info, path, query, and fragment components differ
jaroslav@1258	298	* slightly in terms of which characters are considered legal and illegal.
jaroslav@1258	299	* </p></li>
jaroslav@1258	300	*
jaroslav@1258	301	* </ul>
jaroslav@1258	302	*
jaroslav@1258	303	* These purposes are served in this class by three related operations:
jaroslav@1258	304	*
jaroslav@1258	305	* <ul>
jaroslav@1258	306	*
jaroslav@1258	307	* <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
jaroslav@1258	308	* with the sequence of escaped octets that represent that character in the
jaroslav@1258	309	* UTF-8 character set. The Euro currency symbol (<tt>'\u20AC'</tt>),
jaroslav@1258	310	* for example, is encoded as <tt>"%E2%82%AC"</tt>. <i>(<b>Deviation from
jaroslav@1258	311	* RFC 2396</b>, which does not specify any particular character
jaroslav@1258	312	* set.)</i> </p></li>
jaroslav@1258	313	*
jaroslav@1258	314	* <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
jaroslav@1258	315	* encoding it. The space character, for example, is quoted by replacing it
jaroslav@1258	316	* with <tt>"%20"</tt>. UTF-8 contains US-ASCII, hence for US-ASCII
jaroslav@1258	317	* characters this transformation has exactly the effect required by
jaroslav@1258	318	* RFC 2396. </p></li>
jaroslav@1258	319	*
jaroslav@1258	320	* <li><p><a name="decode"></a>
jaroslav@1258	321	* A sequence of escaped octets is <i>decoded</i> by
jaroslav@1258	322	* replacing it with the sequence of characters that it represents in the
jaroslav@1258	323	* UTF-8 character set. UTF-8 contains US-ASCII, hence decoding has the
jaroslav@1258	324	* effect of de-quoting any quoted US-ASCII characters as well as that of
jaroslav@1258	325	* decoding any encoded non-US-ASCII characters. If a <a
jaroslav@1258	326	* href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
jaroslav@1258	327	* when decoding the escaped octets then the erroneous octets are replaced by
jaroslav@1258	328	* <tt>'\uFFFD'</tt>, the Unicode replacement character. </p></li>
jaroslav@1258	329	*
jaroslav@1258	330	* </ul>
jaroslav@1258	331	*
jaroslav@1258	332	* These operations are exposed in the constructors and methods of this class
jaroslav@1258	333	* as follows:
jaroslav@1258	334	*
jaroslav@1258	335	* <ul>
jaroslav@1258	336	*
jaroslav@1258	337	* <li><p> The {@link #URI(java.lang.String) <code>single-argument
jaroslav@1258	338	* constructor</code>} requires any illegal characters in its argument to be
jaroslav@1258	339	* quoted and preserves any escaped octets and <i>other</i> characters that
jaroslav@1258	340	* are present. </p></li>
jaroslav@1258	341	*
jaroslav@1258	342	* <li><p> The {@link
jaroslav@1258	343	* #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
jaroslav@1258	344	* <code>multi-argument constructors</code>} quote illegal characters as
jaroslav@1258	345	* required by the components in which they appear. The percent character
jaroslav@1258	346	* (<tt>'%'</tt>) is always quoted by these constructors. Any <i>other</i>
jaroslav@1258	347	* characters are preserved. </p></li>
jaroslav@1258	348	*
jaroslav@1258	349	* <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
jaroslav@1258	350	* getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
jaroslav@1258	351	* getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
jaroslav@1258	352	* #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
jaroslav@1258	353	* values of their corresponding components in raw form, without interpreting
jaroslav@1258	354	* any escaped octets. The strings returned by these methods may contain
jaroslav@1258	355	* both escaped octets and <i>other</i> characters, and will not contain any
jaroslav@1258	356	* illegal characters. </p></li>
jaroslav@1258	357	*
jaroslav@1258	358	* <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
jaroslav@1258	359	* getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
jaroslav@1258	360	* getFragment}, {@link #getAuthority() getAuthority}, and {@link
jaroslav@1258	361	* #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
jaroslav@1258	362	* octets in their corresponding components. The strings returned by these
jaroslav@1258	363	* methods may contain both <i>other</i> characters and illegal characters,
jaroslav@1258	364	* and will not contain any escaped octets. </p></li>
jaroslav@1258	365	*
jaroslav@1258	366	* <li><p> The {@link #toString() toString} method returns a URI string with
jaroslav@1258	367	* all necessary quotation but which may contain <i>other</i> characters.
jaroslav@1258	368	* </p></li>
jaroslav@1258	369	*
jaroslav@1258	370	* <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
jaroslav@1258	371	* quoted and encoded URI string that does not contain any <i>other</i>
jaroslav@1258	372	* characters. </p></li>
jaroslav@1258	373	*
jaroslav@1258	374	* </ul>
jaroslav@1258	375	*
jaroslav@1258	376	*
jaroslav@1258	377	* <h4> Identities </h4>
jaroslav@1258	378	*
jaroslav@1258	379	* For any URI <i>u</i>, it is always the case that
jaroslav@1258	380	*
jaroslav@1258	381	* <blockquote>
jaroslav@1258	382	* <tt>new URI(</tt><i>u</i><tt>.toString()).equals(</tt><i>u</i><tt>)</tt> .
jaroslav@1258	383	* </blockquote>
jaroslav@1258	384	*
jaroslav@1258	385	* For any URI <i>u</i> that does not contain redundant syntax such as two
jaroslav@1258	386	* slashes before an empty authority (as in <tt>file:///tmp/</tt> ) or a
jaroslav@1258	387	* colon following a host name but no port (as in
jaroslav@1258	388	* <tt>http://java.sun.com:</tt> ), and that does not encode characters
jaroslav@1258	389	* except those that must be quoted, the following identities also hold:
jaroslav@1258	390	*
jaroslav@1258	391	* <blockquote>
jaroslav@1258	392	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
jaroslav@1258	393	*         </tt><i>u</i><tt>.getSchemeSpecificPart(),<br>
jaroslav@1258	394	*         </tt><i>u</i><tt>.getFragment())<br>
jaroslav@1258	395	* .equals(</tt><i>u</i><tt>)</tt>
jaroslav@1258	396	* </blockquote>
jaroslav@1258	397	*
jaroslav@1258	398	* in all cases,
jaroslav@1258	399	*
jaroslav@1258	400	* <blockquote>
jaroslav@1258	401	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
jaroslav@1258	402	*         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getAuthority(),<br>
jaroslav@1258	403	*         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
jaroslav@1258	404	*         </tt><i>u</i><tt>.getFragment())<br>
jaroslav@1258	405	* .equals(</tt><i>u</i><tt>)</tt>
jaroslav@1258	406	* </blockquote>
jaroslav@1258	407	*
jaroslav@1258	408	* if <i>u</i> is hierarchical, and
jaroslav@1258	409	*
jaroslav@1258	410	* <blockquote>
jaroslav@1258	411	* <tt>new URI(</tt><i>u</i><tt>.getScheme(),<br>
jaroslav@1258	412	*         </tt><i>u</i><tt>.getUserInfo(), </tt><i>u</i><tt>.getHost(), </tt><i>u</i><tt>.getPort(),<br>
jaroslav@1258	413	*         </tt><i>u</i><tt>.getPath(), </tt><i>u</i><tt>.getQuery(),<br>
jaroslav@1258	414	*         </tt><i>u</i><tt>.getFragment())<br>
jaroslav@1258	415	* .equals(</tt><i>u</i><tt>)</tt>
jaroslav@1258	416	* </blockquote>
jaroslav@1258	417	*
jaroslav@1258	418	* if <i>u</i> is hierarchical and has either no authority or a server-based
jaroslav@1258	419	* authority.
jaroslav@1258	420	*
jaroslav@1258	421	*
jaroslav@1258	422	* <h4> URIs, URLs, and URNs </h4>
jaroslav@1258	423	*
jaroslav@1258	424	* A URI is a uniform resource <i>identifier</i> while a URL is a uniform
jaroslav@1258	425	* resource <i>locator</i>. Hence every URL is a URI, abstractly speaking, but
jaroslav@1258	426	* not every URI is a URL. This is because there is another subcategory of
jaroslav@1258	427	* URIs, uniform resource <i>names</i> (URNs), which name resources but do not
jaroslav@1258	428	* specify how to locate them. The <tt>mailto</tt>, <tt>news</tt>, and
jaroslav@1258	429	* <tt>isbn</tt> URIs shown above are examples of URNs.
jaroslav@1258	430	*
jaroslav@1258	431	* <p> The conceptual distinction between URIs and URLs is reflected in the
jaroslav@1258	432	* differences between this class and the {@link URL} class.
jaroslav@1258	433	*
jaroslav@1258	434	* <p> An instance of this class represents a URI reference in the syntactic
jaroslav@1258	435	* sense defined by RFC 2396. A URI may be either absolute or relative.
jaroslav@1258	436	* A URI string is parsed according to the generic syntax without regard to the
jaroslav@1258	437	* scheme, if any, that it specifies. No lookup of the host, if any, is
jaroslav@1258	438	* performed, and no scheme-dependent stream handler is constructed. Equality,
jaroslav@1258	439	* hashing, and comparison are defined strictly in terms of the character
jaroslav@1258	440	* content of the instance. In other words, a URI instance is little more than
jaroslav@1258	441	* a structured string that supports the syntactic, scheme-independent
jaroslav@1258	442	* operations of comparison, normalization, resolution, and relativization.
jaroslav@1258	443	*
jaroslav@1258	444	* <p> An instance of the {@link URL} class, by contrast, represents the
jaroslav@1258	445	* syntactic components of a URL together with some of the information required
jaroslav@1258	446	* to access the resource that it describes. A URL must be absolute, that is,
jaroslav@1258	447	* it must always specify a scheme. A URL string is parsed according to its
jaroslav@1258	448	* scheme. A stream handler is always established for a URL, and in fact it is
jaroslav@1258	449	* impossible to create a URL instance for a scheme for which no handler is
jaroslav@1258	450	* available. Equality and hashing depend upon both the scheme and the
jaroslav@1258	451	* Internet address of the host, if any; comparison is not defined. In other
jaroslav@1258	452	* words, a URL is a structured string that supports the syntactic operation of
jaroslav@1258	453	* resolution as well as the network I/O operations of looking up the host and
jaroslav@1258	454	* opening a connection to the specified resource.
jaroslav@1258	455	*
jaroslav@1258	456	*
jaroslav@1258	457	* @author Mark Reinhold
jaroslav@1258	458	* @since 1.4
jaroslav@1258	459	*
jaroslav@1258	460	* @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279: UTF-8, a
jaroslav@1258	461	* transformation format of ISO 10646</i></a>, <br><a
jaroslav@1258	462	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6 Addressing
jaroslav@1258	463	* Architecture</i></a>, <br><a
jaroslav@1258	464	* href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC 2396: Uniform
jaroslav@1258	465	* Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
jaroslav@1258	466	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
jaroslav@1258	467	* Literal IPv6 Addresses in URLs</i></a>, <br><a
jaroslav@1258	468	* href="URISyntaxException.html">URISyntaxException</a>
jaroslav@1258	469	*/
jaroslav@1258	470
jaroslav@1258	471	public final class URI
jaroslav@1258	472	implements Comparable<URI>, Serializable
jaroslav@1258	473	{
jaroslav@1258	474
jaroslav@1258	475	// Note: Comments containing the word "ASSERT" indicate places where a
jaroslav@1258	476	// throw of an InternalError should be replaced by an appropriate assertion
jaroslav@1258	477	// statement once asserts are enabled in the build.
jaroslav@1258	478
jaroslav@1258	479	static final long serialVersionUID = -6052424284110960213L;
jaroslav@1258	480
jaroslav@1258	481
jaroslav@1258	482	// -- Properties and components of this instance --
jaroslav@1258	483
jaroslav@1258	484	// Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
jaroslav@1258	485	private transient String scheme; // null ==> relative URI
jaroslav@1258	486	private transient String fragment;
jaroslav@1258	487
jaroslav@1258	488	// Hierarchical URI components: [//<authority>]<path>[?<query>]
jaroslav@1258	489	private transient String authority; // Registry or server
jaroslav@1258	490
jaroslav@1258	491	// Server-based authority: [<userInfo>@]<host>[:<port>]
jaroslav@1258	492	private transient String userInfo;
jaroslav@1258	493	private transient String host; // null ==> registry-based
jaroslav@1258	494	private transient int port = -1; // -1 ==> undefined
jaroslav@1258	495
jaroslav@1258	496	// Remaining components of hierarchical URIs
jaroslav@1258	497	private transient String path; // null ==> opaque
jaroslav@1258	498	private transient String query;
jaroslav@1258	499
jaroslav@1258	500	// The remaining fields may be computed on demand
jaroslav@1258	501
jaroslav@1258	502	private volatile transient String schemeSpecificPart;
jaroslav@1258	503	private volatile transient int hash; // Zero ==> undefined
jaroslav@1258	504
jaroslav@1258	505	private volatile transient String decodedUserInfo = null;
jaroslav@1258	506	private volatile transient String decodedAuthority = null;
jaroslav@1258	507	private volatile transient String decodedPath = null;
jaroslav@1258	508	private volatile transient String decodedQuery = null;
jaroslav@1258	509	private volatile transient String decodedFragment = null;
jaroslav@1258	510	private volatile transient String decodedSchemeSpecificPart = null;
jaroslav@1258	511
jaroslav@1258	512	/**
jaroslav@1258	513	* The string form of this URI.
jaroslav@1258	514	*
jaroslav@1258	515	* @serial
jaroslav@1258	516	*/
jaroslav@1258	517	private volatile String string; // The only serializable field
jaroslav@1258	518
jaroslav@1258	519
jaroslav@1258	520
jaroslav@1258	521	// -- Constructors and factories --
jaroslav@1258	522
jaroslav@1258	523	private URI() { } // Used internally
jaroslav@1258	524
jaroslav@1258	525	/**
jaroslav@1258	526	* Constructs a URI by parsing the given string.
jaroslav@1258	527	*
jaroslav@1258	528	* <p> This constructor parses the given string exactly as specified by the
jaroslav@1258	529	* grammar in <a
jaroslav@1258	530	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	531	* Appendix A, <b><i>except for the following deviations:</i></b> </p>
jaroslav@1258	532	*
jaroslav@1258	533	* <ul type=disc>
jaroslav@1258	534	*
jaroslav@1258	535	* <li><p> An empty authority component is permitted as long as it is
jaroslav@1258	536	* followed by a non-empty path, a query component, or a fragment
jaroslav@1258	537	* component. This allows the parsing of URIs such as
jaroslav@1258	538	* <tt>"file:///foo/bar"</tt>, which seems to be the intent of
jaroslav@1258	539	* RFC 2396 although the grammar does not permit it. If the
jaroslav@1258	540	* authority component is empty then the user-information, host, and port
jaroslav@1258	541	* components are undefined. </p></li>
jaroslav@1258	542	*
jaroslav@1258	543	* <li><p> Empty relative paths are permitted; this seems to be the
jaroslav@1258	544	* intent of RFC 2396 although the grammar does not permit it. The
jaroslav@1258	545	* primary consequence of this deviation is that a standalone fragment
jaroslav@1258	546	* such as <tt>"#foo"</tt> parses as a relative URI with an empty path
jaroslav@1258	547	* and the given fragment, and can be usefully <a
jaroslav@1258	548	* href="#resolve-frag">resolved</a> against a base URI.
jaroslav@1258	549	*
jaroslav@1258	550	* <li><p> IPv4 addresses in host components are parsed rigorously, as
jaroslav@1258	551	* specified by <a
jaroslav@1258	552	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>: Each
jaroslav@1258	553	* element of a dotted-quad address must contain no more than three
jaroslav@1258	554	* decimal digits. Each element is further constrained to have a value
jaroslav@1258	555	* no greater than 255. </p></li>
jaroslav@1258	556	*
jaroslav@1258	557	* <li> <p> Hostnames in host components that comprise only a single
jaroslav@1258	558	* domain label are permitted to start with an <i>alphanum</i>
jaroslav@1258	559	* character. This seems to be the intent of <a
jaroslav@1258	560	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
jaroslav@1258	561	* section 3.2.2 although the grammar does not permit it. The
jaroslav@1258	562	* consequence of this deviation is that the authority component of a
jaroslav@1258	563	* hierarchical URI such as <tt>s://123</tt>, will parse as a server-based
jaroslav@1258	564	* authority. </p></li>
jaroslav@1258	565	*
jaroslav@1258	566	* <li><p> IPv6 addresses are permitted for the host component. An IPv6
jaroslav@1258	567	* address must be enclosed in square brackets (<tt>'['</tt> and
jaroslav@1258	568	* <tt>']'</tt>) as specified by <a
jaroslav@1258	569	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>. The
jaroslav@1258	570	* IPv6 address itself must parse according to <a
jaroslav@1258	571	* href="http://www.ietf.org/rfc/rfc2373.txt">RFC 2373</a>. IPv6
jaroslav@1258	572	* addresses are further constrained to describe no more than sixteen
jaroslav@1258	573	* bytes of address information, a constraint implicit in RFC 2373
jaroslav@1258	574	* but not expressible in the grammar. </p></li>
jaroslav@1258	575	*
jaroslav@1258	576	* <li><p> Characters in the <i>other</i> category are permitted wherever
jaroslav@1258	577	* RFC 2396 permits <i>escaped</i> octets, that is, in the
jaroslav@1258	578	* user-information, path, query, and fragment components, as well as in
jaroslav@1258	579	* the authority component if the authority is registry-based. This
jaroslav@1258	580	* allows URIs to contain Unicode characters beyond those in the US-ASCII
jaroslav@1258	581	* character set. </p></li>
jaroslav@1258	582	*
jaroslav@1258	583	* </ul>
jaroslav@1258	584	*
jaroslav@1258	585	* @param str The string to be parsed into a URI
jaroslav@1258	586	*
jaroslav@1258	587	* @throws NullPointerException
jaroslav@1258	588	* If <tt>str</tt> is <tt>null</tt>
jaroslav@1258	589	*
jaroslav@1258	590	* @throws URISyntaxException
jaroslav@1258	591	* If the given string violates RFC 2396, as augmented
jaroslav@1258	592	* by the above deviations
jaroslav@1258	593	*/
jaroslav@1258	594	public URI(String str) throws URISyntaxException {
jaroslav@1258	595	new Parser(str).parse(false);
jaroslav@1258	596	}
jaroslav@1258	597
jaroslav@1258	598	/**
jaroslav@1258	599	* Constructs a hierarchical URI from the given components.
jaroslav@1258	600	*
jaroslav@1258	601	* <p> If a scheme is given then the path, if also given, must either be
jaroslav@1258	602	* empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
jaroslav@1258	603	* component of the new URI may be left undefined by passing <tt>null</tt>
jaroslav@1258	604	* for the corresponding parameter or, in the case of the <tt>port</tt>
jaroslav@1258	605	* parameter, by passing <tt>-1</tt>.
jaroslav@1258	606	*
jaroslav@1258	607	* <p> This constructor first builds a URI string from the given components
jaroslav@1258	608	* according to the rules specified in <a
jaroslav@1258	609	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	610	* section 5.2, step 7: </p>
jaroslav@1258	611	*
jaroslav@1258	612	* <ol>
jaroslav@1258	613	*
jaroslav@1258	614	* <li><p> Initially, the result string is empty. </p></li>
jaroslav@1258	615	*
jaroslav@1258	616	* <li><p> If a scheme is given then it is appended to the result,
jaroslav@1258	617	* followed by a colon character (<tt>':'</tt>). </p></li>
jaroslav@1258	618	*
jaroslav@1258	619	* <li><p> If user information, a host, or a port are given then the
jaroslav@1258	620	* string <tt>"//"</tt> is appended. </p></li>
jaroslav@1258	621	*
jaroslav@1258	622	* <li><p> If user information is given then it is appended, followed by
jaroslav@1258	623	* a commercial-at character (<tt>'@'</tt>). Any character not in the
jaroslav@1258	624	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	625	* categories is <a href="#quote">quoted</a>. </p></li>
jaroslav@1258	626	*
jaroslav@1258	627	* <li><p> If a host is given then it is appended. If the host is a
jaroslav@1258	628	* literal IPv6 address but is not enclosed in square brackets
jaroslav@1258	629	* (<tt>'['</tt> and <tt>']'</tt>) then the square brackets are added.
jaroslav@1258	630	* </p></li>
jaroslav@1258	631	*
jaroslav@1258	632	* <li><p> If a port number is given then a colon character
jaroslav@1258	633	* (<tt>':'</tt>) is appended, followed by the port number in decimal.
jaroslav@1258	634	* </p></li>
jaroslav@1258	635	*
jaroslav@1258	636	* <li><p> If a path is given then it is appended. Any character not in
jaroslav@1258	637	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	638	* categories, and not equal to the slash character (<tt>'/'</tt>) or the
jaroslav@1258	639	* commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
jaroslav@1258	640	*
jaroslav@1258	641	* <li><p> If a query is given then a question-mark character
jaroslav@1258	642	* (<tt>'?'</tt>) is appended, followed by the query. Any character that
jaroslav@1258	643	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
jaroslav@1258	644	* </p></li>
jaroslav@1258	645	*
jaroslav@1258	646	* <li><p> Finally, if a fragment is given then a hash character
jaroslav@1258	647	* (<tt>'#'</tt>) is appended, followed by the fragment. Any character
jaroslav@1258	648	* that is not a legal URI character is quoted. </p></li>
jaroslav@1258	649	*
jaroslav@1258	650	* </ol>
jaroslav@1258	651	*
jaroslav@1258	652	* <p> The resulting URI string is then parsed as if by invoking the {@link
jaroslav@1258	653	* #URI(String)} constructor and then invoking the {@link
jaroslav@1258	654	* #parseServerAuthority()} method upon the result; this may cause a {@link
jaroslav@1258	655	* URISyntaxException} to be thrown. </p>
jaroslav@1258	656	*
jaroslav@1258	657	* @param scheme Scheme name
jaroslav@1258	658	* @param userInfo User name and authorization information
jaroslav@1258	659	* @param host Host name
jaroslav@1258	660	* @param port Port number
jaroslav@1258	661	* @param path Path
jaroslav@1258	662	* @param query Query
jaroslav@1258	663	* @param fragment Fragment
jaroslav@1258	664	*
jaroslav@1258	665	* @throws URISyntaxException
jaroslav@1258	666	* If both a scheme and a path are given but the path is relative,
jaroslav@1258	667	* if the URI string constructed from the given components violates
jaroslav@1258	668	* RFC 2396, or if the authority component of the string is
jaroslav@1258	669	* present but cannot be parsed as a server-based authority
jaroslav@1258	670	*/
jaroslav@1258	671	public URI(String scheme,
jaroslav@1258	672	String userInfo, String host, int port,
jaroslav@1258	673	String path, String query, String fragment)
jaroslav@1258	674	throws URISyntaxException
jaroslav@1258	675	{
jaroslav@1258	676	String s = toString(scheme, null,
jaroslav@1258	677	null, userInfo, host, port,
jaroslav@1258	678	path, query, fragment);
jaroslav@1258	679	checkPath(s, scheme, path);
jaroslav@1258	680	new Parser(s).parse(true);
jaroslav@1258	681	}
jaroslav@1258	682
jaroslav@1258	683	/**
jaroslav@1258	684	* Constructs a hierarchical URI from the given components.
jaroslav@1258	685	*
jaroslav@1258	686	* <p> If a scheme is given then the path, if also given, must either be
jaroslav@1258	687	* empty or begin with a slash character (<tt>'/'</tt>). Otherwise a
jaroslav@1258	688	* component of the new URI may be left undefined by passing <tt>null</tt>
jaroslav@1258	689	* for the corresponding parameter.
jaroslav@1258	690	*
jaroslav@1258	691	* <p> This constructor first builds a URI string from the given components
jaroslav@1258	692	* according to the rules specified in <a
jaroslav@1258	693	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	694	* section 5.2, step 7: </p>
jaroslav@1258	695	*
jaroslav@1258	696	* <ol>
jaroslav@1258	697	*
jaroslav@1258	698	* <li><p> Initially, the result string is empty. </p></li>
jaroslav@1258	699	*
jaroslav@1258	700	* <li><p> If a scheme is given then it is appended to the result,
jaroslav@1258	701	* followed by a colon character (<tt>':'</tt>). </p></li>
jaroslav@1258	702	*
jaroslav@1258	703	* <li><p> If an authority is given then the string <tt>"//"</tt> is
jaroslav@1258	704	* appended, followed by the authority. If the authority contains a
jaroslav@1258	705	* literal IPv6 address then the address must be enclosed in square
jaroslav@1258	706	* brackets (<tt>'['</tt> and <tt>']'</tt>). Any character not in the
jaroslav@1258	707	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	708	* categories, and not equal to the commercial-at character
jaroslav@1258	709	* (<tt>'@'</tt>), is <a href="#quote">quoted</a>. </p></li>
jaroslav@1258	710	*
jaroslav@1258	711	* <li><p> If a path is given then it is appended. Any character not in
jaroslav@1258	712	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
jaroslav@1258	713	* categories, and not equal to the slash character (<tt>'/'</tt>) or the
jaroslav@1258	714	* commercial-at character (<tt>'@'</tt>), is quoted. </p></li>
jaroslav@1258	715	*
jaroslav@1258	716	* <li><p> If a query is given then a question-mark character
jaroslav@1258	717	* (<tt>'?'</tt>) is appended, followed by the query. Any character that
jaroslav@1258	718	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
jaroslav@1258	719	* </p></li>
jaroslav@1258	720	*
jaroslav@1258	721	* <li><p> Finally, if a fragment is given then a hash character
jaroslav@1258	722	* (<tt>'#'</tt>) is appended, followed by the fragment. Any character
jaroslav@1258	723	* that is not a legal URI character is quoted. </p></li>
jaroslav@1258	724	*
jaroslav@1258	725	* </ol>
jaroslav@1258	726	*
jaroslav@1258	727	* <p> The resulting URI string is then parsed as if by invoking the {@link
jaroslav@1258	728	* #URI(String)} constructor and then invoking the {@link
jaroslav@1258	729	* #parseServerAuthority()} method upon the result; this may cause a {@link
jaroslav@1258	730	* URISyntaxException} to be thrown. </p>
jaroslav@1258	731	*
jaroslav@1258	732	* @param scheme Scheme name
jaroslav@1258	733	* @param authority Authority
jaroslav@1258	734	* @param path Path
jaroslav@1258	735	* @param query Query
jaroslav@1258	736	* @param fragment Fragment
jaroslav@1258	737	*
jaroslav@1258	738	* @throws URISyntaxException
jaroslav@1258	739	* If both a scheme and a path are given but the path is relative,
jaroslav@1258	740	* if the URI string constructed from the given components violates
jaroslav@1258	741	* RFC 2396, or if the authority component of the string is
jaroslav@1258	742	* present but cannot be parsed as a server-based authority
jaroslav@1258	743	*/
jaroslav@1258	744	public URI(String scheme,
jaroslav@1258	745	String authority,
jaroslav@1258	746	String path, String query, String fragment)
jaroslav@1258	747	throws URISyntaxException
jaroslav@1258	748	{
jaroslav@1258	749	String s = toString(scheme, null,
jaroslav@1258	750	authority, null, null, -1,
jaroslav@1258	751	path, query, fragment);
jaroslav@1258	752	checkPath(s, scheme, path);
jaroslav@1258	753	new Parser(s).parse(false);
jaroslav@1258	754	}
jaroslav@1258	755
jaroslav@1258	756	/**
jaroslav@1258	757	* Constructs a hierarchical URI from the given components.
jaroslav@1258	758	*
jaroslav@1258	759	* <p> A component may be left undefined by passing <tt>null</tt>.
jaroslav@1258	760	*
jaroslav@1258	761	* <p> This convenience constructor works as if by invoking the
jaroslav@1258	762	* seven-argument constructor as follows:
jaroslav@1258	763	*
jaroslav@1258	764	* <blockquote><tt>
jaroslav@1258	765	* new {@link #URI(String, String, String, int, String, String, String)
jaroslav@1258	766	* URI}(scheme, null, host, -1, path, null, fragment);
jaroslav@1258	767	* </tt></blockquote>
jaroslav@1258	768	*
jaroslav@1258	769	* @param scheme Scheme name
jaroslav@1258	770	* @param host Host name
jaroslav@1258	771	* @param path Path
jaroslav@1258	772	* @param fragment Fragment
jaroslav@1258	773	*
jaroslav@1258	774	* @throws URISyntaxException
jaroslav@1258	775	* If the URI string constructed from the given components
jaroslav@1258	776	* violates RFC 2396
jaroslav@1258	777	*/
jaroslav@1258	778	public URI(String scheme, String host, String path, String fragment)
jaroslav@1258	779	throws URISyntaxException
jaroslav@1258	780	{
jaroslav@1258	781	this(scheme, null, host, -1, path, null, fragment);
jaroslav@1258	782	}
jaroslav@1258	783
jaroslav@1258	784	/**
jaroslav@1258	785	* Constructs a URI from the given components.
jaroslav@1258	786	*
jaroslav@1258	787	* <p> A component may be left undefined by passing <tt>null</tt>.
jaroslav@1258	788	*
jaroslav@1258	789	* <p> This constructor first builds a URI in string form using the given
jaroslav@1258	790	* components as follows: </p>
jaroslav@1258	791	*
jaroslav@1258	792	* <ol>
jaroslav@1258	793	*
jaroslav@1258	794	* <li><p> Initially, the result string is empty. </p></li>
jaroslav@1258	795	*
jaroslav@1258	796	* <li><p> If a scheme is given then it is appended to the result,
jaroslav@1258	797	* followed by a colon character (<tt>':'</tt>). </p></li>
jaroslav@1258	798	*
jaroslav@1258	799	* <li><p> If a scheme-specific part is given then it is appended. Any
jaroslav@1258	800	* character that is not a <a href="#legal-chars">legal URI character</a>
jaroslav@1258	801	* is <a href="#quote">quoted</a>. </p></li>
jaroslav@1258	802	*
jaroslav@1258	803	* <li><p> Finally, if a fragment is given then a hash character
jaroslav@1258	804	* (<tt>'#'</tt>) is appended to the string, followed by the fragment.
jaroslav@1258	805	* Any character that is not a legal URI character is quoted. </p></li>
jaroslav@1258	806	*
jaroslav@1258	807	* </ol>
jaroslav@1258	808	*
jaroslav@1258	809	* <p> The resulting URI string is then parsed in order to create the new
jaroslav@1258	810	* URI instance as if by invoking the {@link #URI(String)} constructor;
jaroslav@1258	811	* this may cause a {@link URISyntaxException} to be thrown. </p>
jaroslav@1258	812	*
jaroslav@1258	813	* @param scheme Scheme name
jaroslav@1258	814	* @param ssp Scheme-specific part
jaroslav@1258	815	* @param fragment Fragment
jaroslav@1258	816	*
jaroslav@1258	817	* @throws URISyntaxException
jaroslav@1258	818	* If the URI string constructed from the given components
jaroslav@1258	819	* violates RFC 2396
jaroslav@1258	820	*/
jaroslav@1258	821	public URI(String scheme, String ssp, String fragment)
jaroslav@1258	822	throws URISyntaxException
jaroslav@1258	823	{
jaroslav@1258	824	new Parser(toString(scheme, ssp,
jaroslav@1258	825	null, null, null, -1,
jaroslav@1258	826	null, null, fragment))
jaroslav@1258	827	.parse(false);
jaroslav@1258	828	}
jaroslav@1258	829
jaroslav@1258	830	/**
jaroslav@1258	831	* Creates a URI by parsing the given string.
jaroslav@1258	832	*
jaroslav@1258	833	* <p> This convenience factory method works as if by invoking the {@link
jaroslav@1258	834	* #URI(String)} constructor; any {@link URISyntaxException} thrown by the
jaroslav@1258	835	* constructor is caught and wrapped in a new {@link
jaroslav@1258	836	* IllegalArgumentException} object, which is then thrown.
jaroslav@1258	837	*
jaroslav@1258	838	* <p> This method is provided for use in situations where it is known that
jaroslav@1258	839	* the given string is a legal URI, for example for URI constants declared
jaroslav@1258	840	* within in a program, and so it would be considered a programming error
jaroslav@1258	841	* for the string not to parse as such. The constructors, which throw
jaroslav@1258	842	* {@link URISyntaxException} directly, should be used situations where a
jaroslav@1258	843	* URI is being constructed from user input or from some other source that
jaroslav@1258	844	* may be prone to errors. </p>
jaroslav@1258	845	*
jaroslav@1258	846	* @param str The string to be parsed into a URI
jaroslav@1258	847	* @return The new URI
jaroslav@1258	848	*
jaroslav@1258	849	* @throws NullPointerException
jaroslav@1258	850	* If <tt>str</tt> is <tt>null</tt>
jaroslav@1258	851	*
jaroslav@1258	852	* @throws IllegalArgumentException
jaroslav@1258	853	* If the given string violates RFC 2396
jaroslav@1258	854	*/
jaroslav@1258	855	public static URI create(String str) {
jaroslav@1258	856	try {
jaroslav@1258	857	return new URI(str);
jaroslav@1258	858	} catch (URISyntaxException x) {
jaroslav@1258	859	throw new IllegalArgumentException(x.getMessage(), x);
jaroslav@1258	860	}
jaroslav@1258	861	}
jaroslav@1258	862
jaroslav@1258	863
jaroslav@1258	864	// -- Operations --
jaroslav@1258	865
jaroslav@1258	866	/**
jaroslav@1258	867	* Attempts to parse this URI's authority component, if defined, into
jaroslav@1258	868	* user-information, host, and port components.
jaroslav@1258	869	*
jaroslav@1258	870	* <p> If this URI's authority component has already been recognized as
jaroslav@1258	871	* being server-based then it will already have been parsed into
jaroslav@1258	872	* user-information, host, and port components. In this case, or if this
jaroslav@1258	873	* URI has no authority component, this method simply returns this URI.
jaroslav@1258	874	*
jaroslav@1258	875	* <p> Otherwise this method attempts once more to parse the authority
jaroslav@1258	876	* component into user-information, host, and port components, and throws
jaroslav@1258	877	* an exception describing why the authority component could not be parsed
jaroslav@1258	878	* in that way.
jaroslav@1258	879	*
jaroslav@1258	880	* <p> This method is provided because the generic URI syntax specified in
jaroslav@1258	881	* <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
jaroslav@1258	882	* cannot always distinguish a malformed server-based authority from a
jaroslav@1258	883	* legitimate registry-based authority. It must therefore treat some
jaroslav@1258	884	* instances of the former as instances of the latter. The authority
jaroslav@1258	885	* component in the URI string <tt>"//foo:bar"</tt>, for example, is not a
jaroslav@1258	886	* legal server-based authority but it is legal as a registry-based
jaroslav@1258	887	* authority.
jaroslav@1258	888	*
jaroslav@1258	889	* <p> In many common situations, for example when working URIs that are
jaroslav@1258	890	* known to be either URNs or URLs, the hierarchical URIs being used will
jaroslav@1258	891	* always be server-based. They therefore must either be parsed as such or
jaroslav@1258	892	* treated as an error. In these cases a statement such as
jaroslav@1258	893	*
jaroslav@1258	894	* <blockquote>
jaroslav@1258	895	* <tt>URI </tt><i>u</i><tt> = new URI(str).parseServerAuthority();</tt>
jaroslav@1258	896	* </blockquote>
jaroslav@1258	897	*
jaroslav@1258	898	* <p> can be used to ensure that <i>u</i> always refers to a URI that, if
jaroslav@1258	899	* it has an authority component, has a server-based authority with proper
jaroslav@1258	900	* user-information, host, and port components. Invoking this method also
jaroslav@1258	901	* ensures that if the authority could not be parsed in that way then an
jaroslav@1258	902	* appropriate diagnostic message can be issued based upon the exception
jaroslav@1258	903	* that is thrown. </p>
jaroslav@1258	904	*
jaroslav@1258	905	* @return A URI whose authority field has been parsed
jaroslav@1258	906	* as a server-based authority
jaroslav@1258	907	*
jaroslav@1258	908	* @throws URISyntaxException
jaroslav@1258	909	* If the authority component of this URI is defined
jaroslav@1258	910	* but cannot be parsed as a server-based authority
jaroslav@1258	911	* according to RFC 2396
jaroslav@1258	912	*/
jaroslav@1258	913	public URI parseServerAuthority()
jaroslav@1258	914	throws URISyntaxException
jaroslav@1258	915	{
jaroslav@1258	916	// We could be clever and cache the error message and index from the
jaroslav@1258	917	// exception thrown during the original parse, but that would require
jaroslav@1258	918	// either more fields or a more-obscure representation.
jaroslav@1258	919	if ((host != null) \|\| (authority == null))
jaroslav@1258	920	return this;
jaroslav@1258	921	defineString();
jaroslav@1258	922	new Parser(string).parse(true);
jaroslav@1258	923	return this;
jaroslav@1258	924	}
jaroslav@1258	925
jaroslav@1258	926	/**
jaroslav@1258	927	* Normalizes this URI's path.
jaroslav@1258	928	*
jaroslav@1258	929	* <p> If this URI is opaque, or if its path is already in normal form,
jaroslav@1258	930	* then this URI is returned. Otherwise a new URI is constructed that is
jaroslav@1258	931	* identical to this URI except that its path is computed by normalizing
jaroslav@1258	932	* this URI's path in a manner consistent with <a
jaroslav@1258	933	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	934	* section 5.2, step 6, sub-steps c through f; that is:
jaroslav@1258	935	* </p>
jaroslav@1258	936	*
jaroslav@1258	937	* <ol>
jaroslav@1258	938	*
jaroslav@1258	939	* <li><p> All <tt>"."</tt> segments are removed. </p></li>
jaroslav@1258	940	*
jaroslav@1258	941	* <li><p> If a <tt>".."</tt> segment is preceded by a non-<tt>".."</tt>
jaroslav@1258	942	* segment then both of these segments are removed. This step is
jaroslav@1258	943	* repeated until it is no longer applicable. </p></li>
jaroslav@1258	944	*
jaroslav@1258	945	* <li><p> If the path is relative, and if its first segment contains a
jaroslav@1258	946	* colon character (<tt>':'</tt>), then a <tt>"."</tt> segment is
jaroslav@1258	947	* prepended. This prevents a relative URI with a path such as
jaroslav@1258	948	* <tt>"a:b/c/d"</tt> from later being re-parsed as an opaque URI with a
jaroslav@1258	949	* scheme of <tt>"a"</tt> and a scheme-specific part of <tt>"b/c/d"</tt>.
jaroslav@1258	950	* <b><i>(Deviation from RFC 2396)</i></b> </p></li>
jaroslav@1258	951	*
jaroslav@1258	952	* </ol>
jaroslav@1258	953	*
jaroslav@1258	954	* <p> A normalized path will begin with one or more <tt>".."</tt> segments
jaroslav@1258	955	* if there were insufficient non-<tt>".."</tt> segments preceding them to
jaroslav@1258	956	* allow their removal. A normalized path will begin with a <tt>"."</tt>
jaroslav@1258	957	* segment if one was inserted by step 3 above. Otherwise, a normalized
jaroslav@1258	958	* path will not contain any <tt>"."</tt> or <tt>".."</tt> segments. </p>
jaroslav@1258	959	*
jaroslav@1258	960	* @return A URI equivalent to this URI,
jaroslav@1258	961	* but whose path is in normal form
jaroslav@1258	962	*/
jaroslav@1258	963	public URI normalize() {
jaroslav@1258	964	return normalize(this);
jaroslav@1258	965	}
jaroslav@1258	966
jaroslav@1258	967	/**
jaroslav@1258	968	* Resolves the given URI against this URI.
jaroslav@1258	969	*
jaroslav@1258	970	* <p> If the given URI is already absolute, or if this URI is opaque, then
jaroslav@1258	971	* the given URI is returned.
jaroslav@1258	972	*
jaroslav@1258	973	* <p><a name="resolve-frag"></a> If the given URI's fragment component is
jaroslav@1258	974	* defined, its path component is empty, and its scheme, authority, and
jaroslav@1258	975	* query components are undefined, then a URI with the given fragment but
jaroslav@1258	976	* with all other components equal to those of this URI is returned. This
jaroslav@1258	977	* allows a URI representing a standalone fragment reference, such as
jaroslav@1258	978	* <tt>"#foo"</tt>, to be usefully resolved against a base URI.
jaroslav@1258	979	*
jaroslav@1258	980	* <p> Otherwise this method constructs a new hierarchical URI in a manner
jaroslav@1258	981	* consistent with <a
jaroslav@1258	982	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	983	* section 5.2; that is: </p>
jaroslav@1258	984	*
jaroslav@1258	985	* <ol>
jaroslav@1258	986	*
jaroslav@1258	987	* <li><p> A new URI is constructed with this URI's scheme and the given
jaroslav@1258	988	* URI's query and fragment components. </p></li>
jaroslav@1258	989	*
jaroslav@1258	990	* <li><p> If the given URI has an authority component then the new URI's
jaroslav@1258	991	* authority and path are taken from the given URI. </p></li>
jaroslav@1258	992	*
jaroslav@1258	993	* <li><p> Otherwise the new URI's authority component is copied from
jaroslav@1258	994	* this URI, and its path is computed as follows: </p>
jaroslav@1258	995	*
jaroslav@1258	996	* <ol type=a>
jaroslav@1258	997	*
jaroslav@1258	998	* <li><p> If the given URI's path is absolute then the new URI's path
jaroslav@1258	999	* is taken from the given URI. </p></li>
jaroslav@1258	1000	*
jaroslav@1258	1001	* <li><p> Otherwise the given URI's path is relative, and so the new
jaroslav@1258	1002	* URI's path is computed by resolving the path of the given URI
jaroslav@1258	1003	* against the path of this URI. This is done by concatenating all but
jaroslav@1258	1004	* the last segment of this URI's path, if any, with the given URI's
jaroslav@1258	1005	* path and then normalizing the result as if by invoking the {@link
jaroslav@1258	1006	* #normalize() normalize} method. </p></li>
jaroslav@1258	1007	*
jaroslav@1258	1008	* </ol></li>
jaroslav@1258	1009	*
jaroslav@1258	1010	* </ol>
jaroslav@1258	1011	*
jaroslav@1258	1012	* <p> The result of this method is absolute if, and only if, either this
jaroslav@1258	1013	* URI is absolute or the given URI is absolute. </p>
jaroslav@1258	1014	*
jaroslav@1258	1015	* @param uri The URI to be resolved against this URI
jaroslav@1258	1016	* @return The resulting URI
jaroslav@1258	1017	*
jaroslav@1258	1018	* @throws NullPointerException
jaroslav@1258	1019	* If <tt>uri</tt> is <tt>null</tt>
jaroslav@1258	1020	*/
jaroslav@1258	1021	public URI resolve(URI uri) {
jaroslav@1258	1022	return resolve(this, uri);
jaroslav@1258	1023	}
jaroslav@1258	1024
jaroslav@1258	1025	/**
jaroslav@1258	1026	* Constructs a new URI by parsing the given string and then resolving it
jaroslav@1258	1027	* against this URI.
jaroslav@1258	1028	*
jaroslav@1258	1029	* <p> This convenience method works as if invoking it were equivalent to
jaroslav@1258	1030	* evaluating the expression <tt>{@link #resolve(java.net.URI)
jaroslav@1258	1031	* resolve}(URI.{@link #create(String) create}(str))</tt>. </p>
jaroslav@1258	1032	*
jaroslav@1258	1033	* @param str The string to be parsed into a URI
jaroslav@1258	1034	* @return The resulting URI
jaroslav@1258	1035	*
jaroslav@1258	1036	* @throws NullPointerException
jaroslav@1258	1037	* If <tt>str</tt> is <tt>null</tt>
jaroslav@1258	1038	*
jaroslav@1258	1039	* @throws IllegalArgumentException
jaroslav@1258	1040	* If the given string violates RFC 2396
jaroslav@1258	1041	*/
jaroslav@1258	1042	public URI resolve(String str) {
jaroslav@1258	1043	return resolve(URI.create(str));
jaroslav@1258	1044	}
jaroslav@1258	1045
jaroslav@1258	1046	/**
jaroslav@1258	1047	* Relativizes the given URI against this URI.
jaroslav@1258	1048	*
jaroslav@1258	1049	* <p> The relativization of the given URI against this URI is computed as
jaroslav@1258	1050	* follows: </p>
jaroslav@1258	1051	*
jaroslav@1258	1052	* <ol>
jaroslav@1258	1053	*
jaroslav@1258	1054	* <li><p> If either this URI or the given URI are opaque, or if the
jaroslav@1258	1055	* scheme and authority components of the two URIs are not identical, or
jaroslav@1258	1056	* if the path of this URI is not a prefix of the path of the given URI,
jaroslav@1258	1057	* then the given URI is returned. </p></li>
jaroslav@1258	1058	*
jaroslav@1258	1059	* <li><p> Otherwise a new relative hierarchical URI is constructed with
jaroslav@1258	1060	* query and fragment components taken from the given URI and with a path
jaroslav@1258	1061	* component computed by removing this URI's path from the beginning of
jaroslav@1258	1062	* the given URI's path. </p></li>
jaroslav@1258	1063	*
jaroslav@1258	1064	* </ol>
jaroslav@1258	1065	*
jaroslav@1258	1066	* @param uri The URI to be relativized against this URI
jaroslav@1258	1067	* @return The resulting URI
jaroslav@1258	1068	*
jaroslav@1258	1069	* @throws NullPointerException
jaroslav@1258	1070	* If <tt>uri</tt> is <tt>null</tt>
jaroslav@1258	1071	*/
jaroslav@1258	1072	public URI relativize(URI uri) {
jaroslav@1258	1073	return relativize(this, uri);
jaroslav@1258	1074	}
jaroslav@1258	1075
jaroslav@1258	1076	/**
jaroslav@1258	1077	* Constructs a URL from this URI.
jaroslav@1258	1078	*
jaroslav@1258	1079	* <p> This convenience method works as if invoking it were equivalent to
jaroslav@1258	1080	* evaluating the expression <tt>new URL(this.toString())</tt> after
jaroslav@1258	1081	* first checking that this URI is absolute. </p>
jaroslav@1258	1082	*
jaroslav@1258	1083	* @return A URL constructed from this URI
jaroslav@1258	1084	*
jaroslav@1258	1085	* @throws IllegalArgumentException
jaroslav@1258	1086	* If this URL is not absolute
jaroslav@1258	1087	*
jaroslav@1258	1088	* @throws MalformedURLException
jaroslav@1258	1089	* If a protocol handler for the URL could not be found,
jaroslav@1258	1090	* or if some other error occurred while constructing the URL
jaroslav@1258	1091	*/
jaroslav@1258	1092	public URL toURL()
jaroslav@1258	1093	throws MalformedURLException {
jaroslav@1258	1094	if (!isAbsolute())
jaroslav@1258	1095	throw new IllegalArgumentException("URI is not absolute");
jaroslav@1258	1096	return new URL(toString());
jaroslav@1258	1097	}
jaroslav@1258	1098
jaroslav@1258	1099	// -- Component access methods --
jaroslav@1258	1100
jaroslav@1258	1101	/**
jaroslav@1258	1102	* Returns the scheme component of this URI.
jaroslav@1258	1103	*
jaroslav@1258	1104	* <p> The scheme component of a URI, if defined, only contains characters
jaroslav@1258	1105	* in the <i>alphanum</i> category and in the string <tt>"-.+"</tt>. A
jaroslav@1258	1106	* scheme always starts with an <i>alpha</i> character. <p>
jaroslav@1258	1107	*
jaroslav@1258	1108	* The scheme component of a URI cannot contain escaped octets, hence this
jaroslav@1258	1109	* method does not perform any decoding.
jaroslav@1258	1110	*
jaroslav@1258	1111	* @return The scheme component of this URI,
jaroslav@1258	1112	* or <tt>null</tt> if the scheme is undefined
jaroslav@1258	1113	*/
jaroslav@1258	1114	public String getScheme() {
jaroslav@1258	1115	return scheme;
jaroslav@1258	1116	}
jaroslav@1258	1117
jaroslav@1258	1118	/**
jaroslav@1258	1119	* Tells whether or not this URI is absolute.
jaroslav@1258	1120	*
jaroslav@1258	1121	* <p> A URI is absolute if, and only if, it has a scheme component. </p>
jaroslav@1258	1122	*
jaroslav@1258	1123	* @return <tt>true</tt> if, and only if, this URI is absolute
jaroslav@1258	1124	*/
jaroslav@1258	1125	public boolean isAbsolute() {
jaroslav@1258	1126	return scheme != null;
jaroslav@1258	1127	}
jaroslav@1258	1128
jaroslav@1258	1129	/**
jaroslav@1258	1130	* Tells whether or not this URI is opaque.
jaroslav@1258	1131	*
jaroslav@1258	1132	* <p> A URI is opaque if, and only if, it is absolute and its
jaroslav@1258	1133	* scheme-specific part does not begin with a slash character ('/').
jaroslav@1258	1134	* An opaque URI has a scheme, a scheme-specific part, and possibly
jaroslav@1258	1135	* a fragment; all other components are undefined. </p>
jaroslav@1258	1136	*
jaroslav@1258	1137	* @return <tt>true</tt> if, and only if, this URI is opaque
jaroslav@1258	1138	*/
jaroslav@1258	1139	public boolean isOpaque() {
jaroslav@1258	1140	return path == null;
jaroslav@1258	1141	}
jaroslav@1258	1142
jaroslav@1258	1143	/**
jaroslav@1258	1144	* Returns the raw scheme-specific part of this URI. The scheme-specific
jaroslav@1258	1145	* part is never undefined, though it may be empty.
jaroslav@1258	1146	*
jaroslav@1258	1147	* <p> The scheme-specific part of a URI only contains legal URI
jaroslav@1258	1148	* characters. </p>
jaroslav@1258	1149	*
jaroslav@1258	1150	* @return The raw scheme-specific part of this URI
jaroslav@1258	1151	* (never <tt>null</tt>)
jaroslav@1258	1152	*/
jaroslav@1258	1153	public String getRawSchemeSpecificPart() {
jaroslav@1258	1154	defineSchemeSpecificPart();
jaroslav@1258	1155	return schemeSpecificPart;
jaroslav@1258	1156	}
jaroslav@1258	1157
jaroslav@1258	1158	/**
jaroslav@1258	1159	* Returns the decoded scheme-specific part of this URI.
jaroslav@1258	1160	*
jaroslav@1258	1161	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1162	* {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
jaroslav@1258	1163	* except that all sequences of escaped octets are <a
jaroslav@1258	1164	* href="#decode">decoded</a>. </p>
jaroslav@1258	1165	*
jaroslav@1258	1166	* @return The decoded scheme-specific part of this URI
jaroslav@1258	1167	* (never <tt>null</tt>)
jaroslav@1258	1168	*/
jaroslav@1258	1169	public String getSchemeSpecificPart() {
jaroslav@1258	1170	if (decodedSchemeSpecificPart == null)
jaroslav@1258	1171	decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
jaroslav@1258	1172	return decodedSchemeSpecificPart;
jaroslav@1258	1173	}
jaroslav@1258	1174
jaroslav@1258	1175	/**
jaroslav@1258	1176	* Returns the raw authority component of this URI.
jaroslav@1258	1177	*
jaroslav@1258	1178	* <p> The authority component of a URI, if defined, only contains the
jaroslav@1258	1179	* commercial-at character (<tt>'@'</tt>) and characters in the
jaroslav@1258	1180	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
jaroslav@1258	1181	* categories. If the authority is server-based then it is further
jaroslav@1258	1182	* constrained to have valid user-information, host, and port
jaroslav@1258	1183	* components. </p>
jaroslav@1258	1184	*
jaroslav@1258	1185	* @return The raw authority component of this URI,
jaroslav@1258	1186	* or <tt>null</tt> if the authority is undefined
jaroslav@1258	1187	*/
jaroslav@1258	1188	public String getRawAuthority() {
jaroslav@1258	1189	return authority;
jaroslav@1258	1190	}
jaroslav@1258	1191
jaroslav@1258	1192	/**
jaroslav@1258	1193	* Returns the decoded authority component of this URI.
jaroslav@1258	1194	*
jaroslav@1258	1195	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1196	* {@link #getRawAuthority() getRawAuthority} method except that all
jaroslav@1258	1197	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1198	*
jaroslav@1258	1199	* @return The decoded authority component of this URI,
jaroslav@1258	1200	* or <tt>null</tt> if the authority is undefined
jaroslav@1258	1201	*/
jaroslav@1258	1202	public String getAuthority() {
jaroslav@1258	1203	if (decodedAuthority == null)
jaroslav@1258	1204	decodedAuthority = decode(authority);
jaroslav@1258	1205	return decodedAuthority;
jaroslav@1258	1206	}
jaroslav@1258	1207
jaroslav@1258	1208	/**
jaroslav@1258	1209	* Returns the raw user-information component of this URI.
jaroslav@1258	1210	*
jaroslav@1258	1211	* <p> The user-information component of a URI, if defined, only contains
jaroslav@1258	1212	* characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
jaroslav@1258	1213	* <i>other</i> categories. </p>
jaroslav@1258	1214	*
jaroslav@1258	1215	* @return The raw user-information component of this URI,
jaroslav@1258	1216	* or <tt>null</tt> if the user information is undefined
jaroslav@1258	1217	*/
jaroslav@1258	1218	public String getRawUserInfo() {
jaroslav@1258	1219	return userInfo;
jaroslav@1258	1220	}
jaroslav@1258	1221
jaroslav@1258	1222	/**
jaroslav@1258	1223	* Returns the decoded user-information component of this URI.
jaroslav@1258	1224	*
jaroslav@1258	1225	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1226	* {@link #getRawUserInfo() getRawUserInfo} method except that all
jaroslav@1258	1227	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1228	*
jaroslav@1258	1229	* @return The decoded user-information component of this URI,
jaroslav@1258	1230	* or <tt>null</tt> if the user information is undefined
jaroslav@1258	1231	*/
jaroslav@1258	1232	public String getUserInfo() {
jaroslav@1258	1233	if ((decodedUserInfo == null) && (userInfo != null))
jaroslav@1258	1234	decodedUserInfo = decode(userInfo);
jaroslav@1258	1235	return decodedUserInfo;
jaroslav@1258	1236	}
jaroslav@1258	1237
jaroslav@1258	1238	/**
jaroslav@1258	1239	* Returns the host component of this URI.
jaroslav@1258	1240	*
jaroslav@1258	1241	* <p> The host component of a URI, if defined, will have one of the
jaroslav@1258	1242	* following forms: </p>
jaroslav@1258	1243	*
jaroslav@1258	1244	* <ul type=disc>
jaroslav@1258	1245	*
jaroslav@1258	1246	* <li><p> A domain name consisting of one or more <i>labels</i>
jaroslav@1258	1247	* separated by period characters (<tt>'.'</tt>), optionally followed by
jaroslav@1258	1248	* a period character. Each label consists of <i>alphanum</i> characters
jaroslav@1258	1249	* as well as hyphen characters (<tt>'-'</tt>), though hyphens never
jaroslav@1258	1250	* occur as the first or last characters in a label. The rightmost
jaroslav@1258	1251	* label of a domain name consisting of two or more labels, begins
jaroslav@1258	1252	* with an <i>alpha</i> character. </li>
jaroslav@1258	1253	*
jaroslav@1258	1254	* <li><p> A dotted-quad IPv4 address of the form
jaroslav@1258	1255	* <i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+.</tt><i>digit</i><tt>+</tt>,
jaroslav@1258	1256	* where no <i>digit</i> sequence is longer than three characters and no
jaroslav@1258	1257	* sequence has a value larger than 255. </p></li>
jaroslav@1258	1258	*
jaroslav@1258	1259	* <li><p> An IPv6 address enclosed in square brackets (<tt>'['</tt> and
jaroslav@1258	1260	* <tt>']'</tt>) and consisting of hexadecimal digits, colon characters
jaroslav@1258	1261	* (<tt>':'</tt>), and possibly an embedded IPv4 address. The full
jaroslav@1258	1262	* syntax of IPv6 addresses is specified in <a
jaroslav@1258	1263	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6
jaroslav@1258	1264	* Addressing Architecture</i></a>. </p></li>
jaroslav@1258	1265	*
jaroslav@1258	1266	* </ul>
jaroslav@1258	1267	*
jaroslav@1258	1268	* The host component of a URI cannot contain escaped octets, hence this
jaroslav@1258	1269	* method does not perform any decoding.
jaroslav@1258	1270	*
jaroslav@1258	1271	* @return The host component of this URI,
jaroslav@1258	1272	* or <tt>null</tt> if the host is undefined
jaroslav@1258	1273	*/
jaroslav@1258	1274	public String getHost() {
jaroslav@1258	1275	return host;
jaroslav@1258	1276	}
jaroslav@1258	1277
jaroslav@1258	1278	/**
jaroslav@1258	1279	* Returns the port number of this URI.
jaroslav@1258	1280	*
jaroslav@1258	1281	* <p> The port component of a URI, if defined, is a non-negative
jaroslav@1258	1282	* integer. </p>
jaroslav@1258	1283	*
jaroslav@1258	1284	* @return The port component of this URI,
jaroslav@1258	1285	* or <tt>-1</tt> if the port is undefined
jaroslav@1258	1286	*/
jaroslav@1258	1287	public int getPort() {
jaroslav@1258	1288	return port;
jaroslav@1258	1289	}
jaroslav@1258	1290
jaroslav@1258	1291	/**
jaroslav@1258	1292	* Returns the raw path component of this URI.
jaroslav@1258	1293	*
jaroslav@1258	1294	* <p> The path component of a URI, if defined, only contains the slash
jaroslav@1258	1295	* character (<tt>'/'</tt>), the commercial-at character (<tt>'@'</tt>),
jaroslav@1258	1296	* and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
jaroslav@1258	1297	* and <i>other</i> categories. </p>
jaroslav@1258	1298	*
jaroslav@1258	1299	* @return The path component of this URI,
jaroslav@1258	1300	* or <tt>null</tt> if the path is undefined
jaroslav@1258	1301	*/
jaroslav@1258	1302	public String getRawPath() {
jaroslav@1258	1303	return path;
jaroslav@1258	1304	}
jaroslav@1258	1305
jaroslav@1258	1306	/**
jaroslav@1258	1307	* Returns the decoded path component of this URI.
jaroslav@1258	1308	*
jaroslav@1258	1309	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1310	* {@link #getRawPath() getRawPath} method except that all sequences of
jaroslav@1258	1311	* escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1312	*
jaroslav@1258	1313	* @return The decoded path component of this URI,
jaroslav@1258	1314	* or <tt>null</tt> if the path is undefined
jaroslav@1258	1315	*/
jaroslav@1258	1316	public String getPath() {
jaroslav@1258	1317	if ((decodedPath == null) && (path != null))
jaroslav@1258	1318	decodedPath = decode(path);
jaroslav@1258	1319	return decodedPath;
jaroslav@1258	1320	}
jaroslav@1258	1321
jaroslav@1258	1322	/**
jaroslav@1258	1323	* Returns the raw query component of this URI.
jaroslav@1258	1324	*
jaroslav@1258	1325	* <p> The query component of a URI, if defined, only contains legal URI
jaroslav@1258	1326	* characters. </p>
jaroslav@1258	1327	*
jaroslav@1258	1328	* @return The raw query component of this URI,
jaroslav@1258	1329	* or <tt>null</tt> if the query is undefined
jaroslav@1258	1330	*/
jaroslav@1258	1331	public String getRawQuery() {
jaroslav@1258	1332	return query;
jaroslav@1258	1333	}
jaroslav@1258	1334
jaroslav@1258	1335	/**
jaroslav@1258	1336	* Returns the decoded query component of this URI.
jaroslav@1258	1337	*
jaroslav@1258	1338	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1339	* {@link #getRawQuery() getRawQuery} method except that all sequences of
jaroslav@1258	1340	* escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1341	*
jaroslav@1258	1342	* @return The decoded query component of this URI,
jaroslav@1258	1343	* or <tt>null</tt> if the query is undefined
jaroslav@1258	1344	*/
jaroslav@1258	1345	public String getQuery() {
jaroslav@1258	1346	if ((decodedQuery == null) && (query != null))
jaroslav@1258	1347	decodedQuery = decode(query);
jaroslav@1258	1348	return decodedQuery;
jaroslav@1258	1349	}
jaroslav@1258	1350
jaroslav@1258	1351	/**
jaroslav@1258	1352	* Returns the raw fragment component of this URI.
jaroslav@1258	1353	*
jaroslav@1258	1354	* <p> The fragment component of a URI, if defined, only contains legal URI
jaroslav@1258	1355	* characters. </p>
jaroslav@1258	1356	*
jaroslav@1258	1357	* @return The raw fragment component of this URI,
jaroslav@1258	1358	* or <tt>null</tt> if the fragment is undefined
jaroslav@1258	1359	*/
jaroslav@1258	1360	public String getRawFragment() {
jaroslav@1258	1361	return fragment;
jaroslav@1258	1362	}
jaroslav@1258	1363
jaroslav@1258	1364	/**
jaroslav@1258	1365	* Returns the decoded fragment component of this URI.
jaroslav@1258	1366	*
jaroslav@1258	1367	* <p> The string returned by this method is equal to that returned by the
jaroslav@1258	1368	* {@link #getRawFragment() getRawFragment} method except that all
jaroslav@1258	1369	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
jaroslav@1258	1370	*
jaroslav@1258	1371	* @return The decoded fragment component of this URI,
jaroslav@1258	1372	* or <tt>null</tt> if the fragment is undefined
jaroslav@1258	1373	*/
jaroslav@1258	1374	public String getFragment() {
jaroslav@1258	1375	if ((decodedFragment == null) && (fragment != null))
jaroslav@1258	1376	decodedFragment = decode(fragment);
jaroslav@1258	1377	return decodedFragment;
jaroslav@1258	1378	}
jaroslav@1258	1379
jaroslav@1258	1380
jaroslav@1258	1381	// -- Equality, comparison, hash code, toString, and serialization --
jaroslav@1258	1382
jaroslav@1258	1383	/**
jaroslav@1258	1384	* Tests this URI for equality with another object.
jaroslav@1258	1385	*
jaroslav@1258	1386	* <p> If the given object is not a URI then this method immediately
jaroslav@1258	1387	* returns <tt>false</tt>.
jaroslav@1258	1388	*
jaroslav@1258	1389	* <p> For two URIs to be considered equal requires that either both are
jaroslav@1258	1390	* opaque or both are hierarchical. Their schemes must either both be
jaroslav@1258	1391	* undefined or else be equal without regard to case. Their fragments
jaroslav@1258	1392	* must either both be undefined or else be equal.
jaroslav@1258	1393	*
jaroslav@1258	1394	* <p> For two opaque URIs to be considered equal, their scheme-specific
jaroslav@1258	1395	* parts must be equal.
jaroslav@1258	1396	*
jaroslav@1258	1397	* <p> For two hierarchical URIs to be considered equal, their paths must
jaroslav@1258	1398	* be equal and their queries must either both be undefined or else be
jaroslav@1258	1399	* equal. Their authorities must either both be undefined, or both be
jaroslav@1258	1400	* registry-based, or both be server-based. If their authorities are
jaroslav@1258	1401	* defined and are registry-based, then they must be equal. If their
jaroslav@1258	1402	* authorities are defined and are server-based, then their hosts must be
jaroslav@1258	1403	* equal without regard to case, their port numbers must be equal, and
jaroslav@1258	1404	* their user-information components must be equal.
jaroslav@1258	1405	*
jaroslav@1258	1406	* <p> When testing the user-information, path, query, fragment, authority,
jaroslav@1258	1407	* or scheme-specific parts of two URIs for equality, the raw forms rather
jaroslav@1258	1408	* than the encoded forms of these components are compared and the
jaroslav@1258	1409	* hexadecimal digits of escaped octets are compared without regard to
jaroslav@1258	1410	* case.
jaroslav@1258	1411	*
jaroslav@1258	1412	* <p> This method satisfies the general contract of the {@link
jaroslav@1258	1413	* java.lang.Object#equals(Object) Object.equals} method. </p>
jaroslav@1258	1414	*
jaroslav@1258	1415	* @param ob The object to which this object is to be compared
jaroslav@1258	1416	*
jaroslav@1258	1417	* @return <tt>true</tt> if, and only if, the given object is a URI that
jaroslav@1258	1418	* is identical to this URI
jaroslav@1258	1419	*/
jaroslav@1258	1420	public boolean equals(Object ob) {
jaroslav@1258	1421	if (ob == this)
jaroslav@1258	1422	return true;
jaroslav@1258	1423	if (!(ob instanceof URI))
jaroslav@1258	1424	return false;
jaroslav@1258	1425	URI that = (URI)ob;
jaroslav@1258	1426	if (this.isOpaque() != that.isOpaque()) return false;
jaroslav@1258	1427	if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
jaroslav@1258	1428	if (!equal(this.fragment, that.fragment)) return false;
jaroslav@1258	1429
jaroslav@1258	1430	// Opaque
jaroslav@1258	1431	if (this.isOpaque())
jaroslav@1258	1432	return equal(this.schemeSpecificPart, that.schemeSpecificPart);
jaroslav@1258	1433
jaroslav@1258	1434	// Hierarchical
jaroslav@1258	1435	if (!equal(this.path, that.path)) return false;
jaroslav@1258	1436	if (!equal(this.query, that.query)) return false;
jaroslav@1258	1437
jaroslav@1258	1438	// Authorities
jaroslav@1258	1439	if (this.authority == that.authority) return true;
jaroslav@1258	1440	if (this.host != null) {
jaroslav@1258	1441	// Server-based
jaroslav@1258	1442	if (!equal(this.userInfo, that.userInfo)) return false;
jaroslav@1258	1443	if (!equalIgnoringCase(this.host, that.host)) return false;
jaroslav@1258	1444	if (this.port != that.port) return false;
jaroslav@1258	1445	} else if (this.authority != null) {
jaroslav@1258	1446	// Registry-based
jaroslav@1258	1447	if (!equal(this.authority, that.authority)) return false;
jaroslav@1258	1448	} else if (this.authority != that.authority) {
jaroslav@1258	1449	return false;
jaroslav@1258	1450	}
jaroslav@1258	1451
jaroslav@1258	1452	return true;
jaroslav@1258	1453	}
jaroslav@1258	1454
jaroslav@1258	1455	/**
jaroslav@1258	1456	* Returns a hash-code value for this URI. The hash code is based upon all
jaroslav@1258	1457	* of the URI's components, and satisfies the general contract of the
jaroslav@1258	1458	* {@link java.lang.Object#hashCode() Object.hashCode} method.
jaroslav@1258	1459	*
jaroslav@1258	1460	* @return A hash-code value for this URI
jaroslav@1258	1461	*/
jaroslav@1258	1462	public int hashCode() {
jaroslav@1258	1463	if (hash != 0)
jaroslav@1258	1464	return hash;
jaroslav@1258	1465	int h = hashIgnoringCase(0, scheme);
jaroslav@1258	1466	h = hash(h, fragment);
jaroslav@1258	1467	if (isOpaque()) {
jaroslav@1258	1468	h = hash(h, schemeSpecificPart);
jaroslav@1258	1469	} else {
jaroslav@1258	1470	h = hash(h, path);
jaroslav@1258	1471	h = hash(h, query);
jaroslav@1258	1472	if (host != null) {
jaroslav@1258	1473	h = hash(h, userInfo);
jaroslav@1258	1474	h = hashIgnoringCase(h, host);
jaroslav@1258	1475	h += 1949 * port;
jaroslav@1258	1476	} else {
jaroslav@1258	1477	h = hash(h, authority);
jaroslav@1258	1478	}
jaroslav@1258	1479	}
jaroslav@1258	1480	hash = h;
jaroslav@1258	1481	return h;
jaroslav@1258	1482	}
jaroslav@1258	1483
jaroslav@1258	1484	/**
jaroslav@1258	1485	* Compares this URI to another object, which must be a URI.
jaroslav@1258	1486	*
jaroslav@1258	1487	* <p> When comparing corresponding components of two URIs, if one
jaroslav@1258	1488	* component is undefined but the other is defined then the first is
jaroslav@1258	1489	* considered to be less than the second. Unless otherwise noted, string
jaroslav@1258	1490	* components are ordered according to their natural, case-sensitive
jaroslav@1258	1491	* ordering as defined by the {@link java.lang.String#compareTo(Object)
jaroslav@1258	1492	* String.compareTo} method. String components that are subject to
jaroslav@1258	1493	* encoding are compared by comparing their raw forms rather than their
jaroslav@1258	1494	* encoded forms.
jaroslav@1258	1495	*
jaroslav@1258	1496	* <p> The ordering of URIs is defined as follows: </p>
jaroslav@1258	1497	*
jaroslav@1258	1498	* <ul type=disc>
jaroslav@1258	1499	*
jaroslav@1258	1500	* <li><p> Two URIs with different schemes are ordered according the
jaroslav@1258	1501	* ordering of their schemes, without regard to case. </p></li>
jaroslav@1258	1502	*
jaroslav@1258	1503	* <li><p> A hierarchical URI is considered to be less than an opaque URI
jaroslav@1258	1504	* with an identical scheme. </p></li>
jaroslav@1258	1505	*
jaroslav@1258	1506	* <li><p> Two opaque URIs with identical schemes are ordered according
jaroslav@1258	1507	* to the ordering of their scheme-specific parts. </p></li>
jaroslav@1258	1508	*
jaroslav@1258	1509	* <li><p> Two opaque URIs with identical schemes and scheme-specific
jaroslav@1258	1510	* parts are ordered according to the ordering of their
jaroslav@1258	1511	* fragments. </p></li>
jaroslav@1258	1512	*
jaroslav@1258	1513	* <li><p> Two hierarchical URIs with identical schemes are ordered
jaroslav@1258	1514	* according to the ordering of their authority components: </p>
jaroslav@1258	1515	*
jaroslav@1258	1516	* <ul type=disc>
jaroslav@1258	1517	*
jaroslav@1258	1518	* <li><p> If both authority components are server-based then the URIs
jaroslav@1258	1519	* are ordered according to their user-information components; if these
jaroslav@1258	1520	* components are identical then the URIs are ordered according to the
jaroslav@1258	1521	* ordering of their hosts, without regard to case; if the hosts are
jaroslav@1258	1522	* identical then the URIs are ordered according to the ordering of
jaroslav@1258	1523	* their ports. </p></li>
jaroslav@1258	1524	*
jaroslav@1258	1525	* <li><p> If one or both authority components are registry-based then
jaroslav@1258	1526	* the URIs are ordered according to the ordering of their authority
jaroslav@1258	1527	* components. </p></li>
jaroslav@1258	1528	*
jaroslav@1258	1529	* </ul></li>
jaroslav@1258	1530	*
jaroslav@1258	1531	* <li><p> Finally, two hierarchical URIs with identical schemes and
jaroslav@1258	1532	* authority components are ordered according to the ordering of their
jaroslav@1258	1533	* paths; if their paths are identical then they are ordered according to
jaroslav@1258	1534	* the ordering of their queries; if the queries are identical then they
jaroslav@1258	1535	* are ordered according to the order of their fragments. </p></li>
jaroslav@1258	1536	*
jaroslav@1258	1537	* </ul>
jaroslav@1258	1538	*
jaroslav@1258	1539	* <p> This method satisfies the general contract of the {@link
jaroslav@1258	1540	* java.lang.Comparable#compareTo(Object) Comparable.compareTo}
jaroslav@1258	1541	* method. </p>
jaroslav@1258	1542	*
jaroslav@1258	1543	* @param that
jaroslav@1258	1544	* The object to which this URI is to be compared
jaroslav@1258	1545	*
jaroslav@1258	1546	* @return A negative integer, zero, or a positive integer as this URI is
jaroslav@1258	1547	* less than, equal to, or greater than the given URI
jaroslav@1258	1548	*
jaroslav@1258	1549	* @throws ClassCastException
jaroslav@1258	1550	* If the given object is not a URI
jaroslav@1258	1551	*/
jaroslav@1258	1552	public int compareTo(URI that) {
jaroslav@1258	1553	int c;
jaroslav@1258	1554
jaroslav@1258	1555	if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
jaroslav@1258	1556	return c;
jaroslav@1258	1557
jaroslav@1258	1558	if (this.isOpaque()) {
jaroslav@1258	1559	if (that.isOpaque()) {
jaroslav@1258	1560	// Both opaque
jaroslav@1258	1561	if ((c = compare(this.schemeSpecificPart,
jaroslav@1258	1562	that.schemeSpecificPart)) != 0)
jaroslav@1258	1563	return c;
jaroslav@1258	1564	return compare(this.fragment, that.fragment);
jaroslav@1258	1565	}
jaroslav@1258	1566	return +1; // Opaque > hierarchical
jaroslav@1258	1567	} else if (that.isOpaque()) {
jaroslav@1258	1568	return -1; // Hierarchical < opaque
jaroslav@1258	1569	}
jaroslav@1258	1570
jaroslav@1258	1571	// Hierarchical
jaroslav@1258	1572	if ((this.host != null) && (that.host != null)) {
jaroslav@1258	1573	// Both server-based
jaroslav@1258	1574	if ((c = compare(this.userInfo, that.userInfo)) != 0)
jaroslav@1258	1575	return c;
jaroslav@1258	1576	if ((c = compareIgnoringCase(this.host, that.host)) != 0)
jaroslav@1258	1577	return c;
jaroslav@1258	1578	if ((c = this.port - that.port) != 0)
jaroslav@1258	1579	return c;
jaroslav@1258	1580	} else {
jaroslav@1258	1581	// If one or both authorities are registry-based then we simply
jaroslav@1258	1582	// compare them in the usual, case-sensitive way. If one is
jaroslav@1258	1583	// registry-based and one is server-based then the strings are
jaroslav@1258	1584	// guaranteed to be unequal, hence the comparison will never return
jaroslav@1258	1585	// zero and the compareTo and equals methods will remain
jaroslav@1258	1586	// consistent.
jaroslav@1258	1587	if ((c = compare(this.authority, that.authority)) != 0) return c;
jaroslav@1258	1588	}
jaroslav@1258	1589
jaroslav@1258	1590	if ((c = compare(this.path, that.path)) != 0) return c;
jaroslav@1258	1591	if ((c = compare(this.query, that.query)) != 0) return c;
jaroslav@1258	1592	return compare(this.fragment, that.fragment);
jaroslav@1258	1593	}
jaroslav@1258	1594
jaroslav@1258	1595	/**
jaroslav@1258	1596	* Returns the content of this URI as a string.
jaroslav@1258	1597	*
jaroslav@1258	1598	* <p> If this URI was created by invoking one of the constructors in this
jaroslav@1258	1599	* class then a string equivalent to the original input string, or to the
jaroslav@1258	1600	* string computed from the originally-given components, as appropriate, is
jaroslav@1258	1601	* returned. Otherwise this URI was created by normalization, resolution,
jaroslav@1258	1602	* or relativization, and so a string is constructed from this URI's
jaroslav@1258	1603	* components according to the rules specified in <a
jaroslav@1258	1604	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
jaroslav@1258	1605	* section 5.2, step 7. </p>
jaroslav@1258	1606	*
jaroslav@1258	1607	* @return The string form of this URI
jaroslav@1258	1608	*/
jaroslav@1258	1609	public String toString() {
jaroslav@1258	1610	defineString();
jaroslav@1258	1611	return string;
jaroslav@1258	1612	}
jaroslav@1258	1613
jaroslav@1258	1614	/**
jaroslav@1258	1615	* Returns the content of this URI as a US-ASCII string.
jaroslav@1258	1616	*
jaroslav@1258	1617	* <p> If this URI does not contain any characters in the <i>other</i>
jaroslav@1258	1618	* category then an invocation of this method will return the same value as
jaroslav@1258	1619	* an invocation of the {@link #toString() toString} method. Otherwise
jaroslav@1258	1620	* this method works as if by invoking that method and then <a
jaroslav@1258	1621	* href="#encode">encoding</a> the result. </p>
jaroslav@1258	1622	*
jaroslav@1258	1623	* @return The string form of this URI, encoded as needed
jaroslav@1258	1624	* so that it only contains characters in the US-ASCII
jaroslav@1258	1625	* charset
jaroslav@1258	1626	*/
jaroslav@1258	1627	public String toASCIIString() {
jaroslav@1258	1628	defineString();
jaroslav@1258	1629	return encode(string);
jaroslav@1258	1630	}
jaroslav@1258	1631
jaroslav@1258	1632
jaroslav@1258	1633	// -- Serialization support --
jaroslav@1258	1634
jaroslav@1258	1635	/**
jaroslav@1258	1636	* Saves the content of this URI to the given serial stream.
jaroslav@1258	1637	*
jaroslav@1258	1638	* <p> The only serializable field of a URI instance is its <tt>string</tt>
jaroslav@1258	1639	* field. That field is given a value, if it does not have one already,
jaroslav@1258	1640	* and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
jaroslav@1258	1641	* method of the given object-output stream is invoked. </p>
jaroslav@1258	1642	*
jaroslav@1258	1643	* @param os The object-output stream to which this object
jaroslav@1258	1644	* is to be written
jaroslav@1258	1645	*/
jaroslav@1258	1646	private void writeObject(ObjectOutputStream os)
jaroslav@1258	1647	throws IOException
jaroslav@1258	1648	{
jaroslav@1258	1649	defineString();
jaroslav@1258	1650	os.defaultWriteObject(); // Writes the string field only
jaroslav@1258	1651	}
jaroslav@1258	1652
jaroslav@1258	1653	/**
jaroslav@1258	1654	* Reconstitutes a URI from the given serial stream.
jaroslav@1258	1655	*
jaroslav@1258	1656	* <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
jaroslav@1258	1657	* invoked to read the value of the <tt>string</tt> field. The result is
jaroslav@1258	1658	* then parsed in the usual way.
jaroslav@1258	1659	*
jaroslav@1258	1660	* @param is The object-input stream from which this object
jaroslav@1258	1661	* is being read
jaroslav@1258	1662	*/
jaroslav@1258	1663	private void readObject(ObjectInputStream is)
jaroslav@1258	1664	throws ClassNotFoundException, IOException
jaroslav@1258	1665	{
jaroslav@1258	1666	port = -1; // Argh
jaroslav@1258	1667	is.defaultReadObject();
jaroslav@1258	1668	try {
jaroslav@1258	1669	new Parser(string).parse(false);
jaroslav@1258	1670	} catch (URISyntaxException x) {
jaroslav@1258	1671	IOException y = new InvalidObjectException("Invalid URI");
jaroslav@1258	1672	y.initCause(x);
jaroslav@1258	1673	throw y;
jaroslav@1258	1674	}
jaroslav@1258	1675	}
jaroslav@1258	1676
jaroslav@1258	1677
jaroslav@1258	1678	// -- End of public methods --
jaroslav@1258	1679
jaroslav@1258	1680
jaroslav@1258	1681	// -- Utility methods for string-field comparison and hashing --
jaroslav@1258	1682
jaroslav@1258	1683	// These methods return appropriate values for null string arguments,
jaroslav@1258	1684	// thereby simplifying the equals, hashCode, and compareTo methods.
jaroslav@1258	1685	//
jaroslav@1258	1686	// The case-ignoring methods should only be applied to strings whose
jaroslav@1258	1687	// characters are all known to be US-ASCII. Because of this restriction,
jaroslav@1258	1688	// these methods are faster than the similar methods in the String class.
jaroslav@1258	1689
jaroslav@1258	1690	// US-ASCII only
jaroslav@1258	1691	private static int toLower(char c) {
jaroslav@1258	1692	if ((c >= 'A') && (c <= 'Z'))
jaroslav@1258	1693	return c + ('a' - 'A');
jaroslav@1258	1694	return c;
jaroslav@1258	1695	}
jaroslav@1258	1696
jaroslav@1258	1697	private static boolean equal(String s, String t) {
jaroslav@1258	1698	if (s == t) return true;
jaroslav@1258	1699	if ((s != null) && (t != null)) {
jaroslav@1258	1700	if (s.length() != t.length())
jaroslav@1258	1701	return false;
jaroslav@1258	1702	if (s.indexOf('%') < 0)
jaroslav@1258	1703	return s.equals(t);
jaroslav@1258	1704	int n = s.length();
jaroslav@1258	1705	for (int i = 0; i < n;) {
jaroslav@1258	1706	char c = s.charAt(i);
jaroslav@1258	1707	char d = t.charAt(i);
jaroslav@1258	1708	if (c != '%') {
jaroslav@1258	1709	if (c != d)
jaroslav@1258	1710	return false;
jaroslav@1258	1711	i++;
jaroslav@1258	1712	continue;
jaroslav@1258	1713	}
jaroslav@1258	1714	i++;
jaroslav@1258	1715	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
jaroslav@1258	1716	return false;
jaroslav@1258	1717	i++;
jaroslav@1258	1718	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
jaroslav@1258	1719	return false;
jaroslav@1258	1720	i++;
jaroslav@1258	1721	}
jaroslav@1258	1722	return true;
jaroslav@1258	1723	}
jaroslav@1258	1724	return false;
jaroslav@1258	1725	}
jaroslav@1258	1726
jaroslav@1258	1727	// US-ASCII only
jaroslav@1258	1728	private static boolean equalIgnoringCase(String s, String t) {
jaroslav@1258	1729	if (s == t) return true;
jaroslav@1258	1730	if ((s != null) && (t != null)) {
jaroslav@1258	1731	int n = s.length();
jaroslav@1258	1732	if (t.length() != n)
jaroslav@1258	1733	return false;
jaroslav@1258	1734	for (int i = 0; i < n; i++) {
jaroslav@1258	1735	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
jaroslav@1258	1736	return false;
jaroslav@1258	1737	}
jaroslav@1258	1738	return true;
jaroslav@1258	1739	}
jaroslav@1258	1740	return false;
jaroslav@1258	1741	}
jaroslav@1258	1742
jaroslav@1258	1743	private static int hash(int hash, String s) {
jaroslav@1258	1744	if (s == null) return hash;
jaroslav@1258	1745	return hash * 127 + s.hashCode();
jaroslav@1258	1746	}
jaroslav@1258	1747
jaroslav@1258	1748	// US-ASCII only
jaroslav@1258	1749	private static int hashIgnoringCase(int hash, String s) {
jaroslav@1258	1750	if (s == null) return hash;
jaroslav@1258	1751	int h = hash;
jaroslav@1258	1752	int n = s.length();
jaroslav@1258	1753	for (int i = 0; i < n; i++)
jaroslav@1258	1754	h = 31 * h + toLower(s.charAt(i));
jaroslav@1258	1755	return h;
jaroslav@1258	1756	}
jaroslav@1258	1757
jaroslav@1258	1758	private static int compare(String s, String t) {
jaroslav@1258	1759	if (s == t) return 0;
jaroslav@1258	1760	if (s != null) {
jaroslav@1258	1761	if (t != null)
jaroslav@1258	1762	return s.compareTo(t);
jaroslav@1258	1763	else
jaroslav@1258	1764	return +1;
jaroslav@1258	1765	} else {
jaroslav@1258	1766	return -1;
jaroslav@1258	1767	}
jaroslav@1258	1768	}
jaroslav@1258	1769
jaroslav@1258	1770	// US-ASCII only
jaroslav@1258	1771	private static int compareIgnoringCase(String s, String t) {
jaroslav@1258	1772	if (s == t) return 0;
jaroslav@1258	1773	if (s != null) {
jaroslav@1258	1774	if (t != null) {
jaroslav@1258	1775	int sn = s.length();
jaroslav@1258	1776	int tn = t.length();
jaroslav@1258	1777	int n = sn < tn ? sn : tn;
jaroslav@1258	1778	for (int i = 0; i < n; i++) {
jaroslav@1258	1779	int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
jaroslav@1258	1780	if (c != 0)
jaroslav@1258	1781	return c;
jaroslav@1258	1782	}
jaroslav@1258	1783	return sn - tn;
jaroslav@1258	1784	}
jaroslav@1258	1785	return +1;
jaroslav@1258	1786	} else {
jaroslav@1258	1787	return -1;
jaroslav@1258	1788	}
jaroslav@1258	1789	}
jaroslav@1258	1790
jaroslav@1258	1791
jaroslav@1258	1792	// -- String construction --
jaroslav@1258	1793
jaroslav@1258	1794	// If a scheme is given then the path, if given, must be absolute
jaroslav@1258	1795	//
jaroslav@1258	1796	private static void checkPath(String s, String scheme, String path)
jaroslav@1258	1797	throws URISyntaxException
jaroslav@1258	1798	{
jaroslav@1258	1799	if (scheme != null) {
jaroslav@1258	1800	if ((path != null)
jaroslav@1258	1801	&& ((path.length() > 0) && (path.charAt(0) != '/')))
jaroslav@1258	1802	throw new URISyntaxException(s,
jaroslav@1258	1803	"Relative path in absolute URI");
jaroslav@1258	1804	}
jaroslav@1258	1805	}
jaroslav@1258	1806
jaroslav@1258	1807	private void appendAuthority(StringBuffer sb,
jaroslav@1258	1808	String authority,
jaroslav@1258	1809	String userInfo,
jaroslav@1258	1810	String host,
jaroslav@1258	1811	int port)
jaroslav@1258	1812	{
jaroslav@1258	1813	if (host != null) {
jaroslav@1258	1814	sb.append("//");
jaroslav@1258	1815	if (userInfo != null) {
jaroslav@1258	1816	sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
jaroslav@1258	1817	sb.append('@');
jaroslav@1258	1818	}
jaroslav@1258	1819	boolean needBrackets = ((host.indexOf(':') >= 0)
jaroslav@1258	1820	&& !host.startsWith("[")
jaroslav@1258	1821	&& !host.endsWith("]"));
jaroslav@1258	1822	if (needBrackets) sb.append('[');
jaroslav@1258	1823	sb.append(host);
jaroslav@1258	1824	if (needBrackets) sb.append(']');
jaroslav@1258	1825	if (port != -1) {
jaroslav@1258	1826	sb.append(':');
jaroslav@1258	1827	sb.append(port);
jaroslav@1258	1828	}
jaroslav@1258	1829	} else if (authority != null) {
jaroslav@1258	1830	sb.append("//");
jaroslav@1258	1831	if (authority.startsWith("[")) {
jaroslav@1258	1832	// authority should (but may not) contain an embedded IPv6 address
jaroslav@1258	1833	int end = authority.indexOf("]");
jaroslav@1258	1834	String doquote = authority, dontquote = "";
jaroslav@1258	1835	if (end != -1 && authority.indexOf(":") != -1) {
jaroslav@1258	1836	// the authority contains an IPv6 address
jaroslav@1258	1837	if (end == authority.length()) {
jaroslav@1258	1838	dontquote = authority;
jaroslav@1258	1839	doquote = "";
jaroslav@1258	1840	} else {
jaroslav@1258	1841	dontquote = authority.substring(0 , end + 1);
jaroslav@1258	1842	doquote = authority.substring(end + 1);
jaroslav@1258	1843	}
jaroslav@1258	1844	}
jaroslav@1258	1845	sb.append(dontquote);
jaroslav@1258	1846	sb.append(quote(doquote,
jaroslav@1258	1847	L_REG_NAME \| L_SERVER,
jaroslav@1258	1848	H_REG_NAME \| H_SERVER));
jaroslav@1258	1849	} else {
jaroslav@1258	1850	sb.append(quote(authority,
jaroslav@1258	1851	L_REG_NAME \| L_SERVER,
jaroslav@1258	1852	H_REG_NAME \| H_SERVER));
jaroslav@1258	1853	}
jaroslav@1258	1854	}
jaroslav@1258	1855	}
jaroslav@1258	1856
jaroslav@1258	1857	private void appendSchemeSpecificPart(StringBuffer sb,
jaroslav@1258	1858	String opaquePart,
jaroslav@1258	1859	String authority,
jaroslav@1258	1860	String userInfo,
jaroslav@1258	1861	String host,
jaroslav@1258	1862	int port,
jaroslav@1258	1863	String path,
jaroslav@1258	1864	String query)
jaroslav@1258	1865	{
jaroslav@1258	1866	if (opaquePart != null) {
jaroslav@1258	1867	/* check if SSP begins with an IPv6 address
jaroslav@1258	1868	* because we must not quote a literal IPv6 address
jaroslav@1258	1869	*/
jaroslav@1258	1870	if (opaquePart.startsWith("//[")) {
jaroslav@1258	1871	int end = opaquePart.indexOf("]");
jaroslav@1258	1872	if (end != -1 && opaquePart.indexOf(":")!=-1) {
jaroslav@1258	1873	String doquote, dontquote;
jaroslav@1258	1874	if (end == opaquePart.length()) {
jaroslav@1258	1875	dontquote = opaquePart;
jaroslav@1258	1876	doquote = "";
jaroslav@1258	1877	} else {
jaroslav@1258	1878	dontquote = opaquePart.substring(0,end+1);
jaroslav@1258	1879	doquote = opaquePart.substring(end+1);
jaroslav@1258	1880	}
jaroslav@1258	1881	sb.append (dontquote);
jaroslav@1258	1882	sb.append(quote(doquote, L_URIC, H_URIC));
jaroslav@1258	1883	}
jaroslav@1258	1884	} else {
jaroslav@1258	1885	sb.append(quote(opaquePart, L_URIC, H_URIC));
jaroslav@1258	1886	}
jaroslav@1258	1887	} else {
jaroslav@1258	1888	appendAuthority(sb, authority, userInfo, host, port);
jaroslav@1258	1889	if (path != null)
jaroslav@1258	1890	sb.append(quote(path, L_PATH, H_PATH));
jaroslav@1258	1891	if (query != null) {
jaroslav@1258	1892	sb.append('?');
jaroslav@1258	1893	sb.append(quote(query, L_URIC, H_URIC));
jaroslav@1258	1894	}
jaroslav@1258	1895	}
jaroslav@1258	1896	}
jaroslav@1258	1897
jaroslav@1258	1898	private void appendFragment(StringBuffer sb, String fragment) {
jaroslav@1258	1899	if (fragment != null) {
jaroslav@1258	1900	sb.append('#');
jaroslav@1258	1901	sb.append(quote(fragment, L_URIC, H_URIC));
jaroslav@1258	1902	}
jaroslav@1258	1903	}
jaroslav@1258	1904
jaroslav@1258	1905	private String toString(String scheme,
jaroslav@1258	1906	String opaquePart,
jaroslav@1258	1907	String authority,
jaroslav@1258	1908	String userInfo,
jaroslav@1258	1909	String host,
jaroslav@1258	1910	int port,
jaroslav@1258	1911	String path,
jaroslav@1258	1912	String query,
jaroslav@1258	1913	String fragment)
jaroslav@1258	1914	{
jaroslav@1258	1915	StringBuffer sb = new StringBuffer();
jaroslav@1258	1916	if (scheme != null) {
jaroslav@1258	1917	sb.append(scheme);
jaroslav@1258	1918	sb.append(':');
jaroslav@1258	1919	}
jaroslav@1258	1920	appendSchemeSpecificPart(sb, opaquePart,
jaroslav@1258	1921	authority, userInfo, host, port,
jaroslav@1258	1922	path, query);
jaroslav@1258	1923	appendFragment(sb, fragment);
jaroslav@1258	1924	return sb.toString();
jaroslav@1258	1925	}
jaroslav@1258	1926
jaroslav@1258	1927	private void defineSchemeSpecificPart() {
jaroslav@1258	1928	if (schemeSpecificPart != null) return;
jaroslav@1258	1929	StringBuffer sb = new StringBuffer();
jaroslav@1258	1930	appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
jaroslav@1258	1931	host, port, getPath(), getQuery());
jaroslav@1258	1932	if (sb.length() == 0) return;
jaroslav@1258	1933	schemeSpecificPart = sb.toString();
jaroslav@1258	1934	}
jaroslav@1258	1935
jaroslav@1258	1936	private void defineString() {
jaroslav@1258	1937	if (string != null) return;
jaroslav@1258	1938
jaroslav@1258	1939	StringBuffer sb = new StringBuffer();
jaroslav@1258	1940	if (scheme != null) {
jaroslav@1258	1941	sb.append(scheme);
jaroslav@1258	1942	sb.append(':');
jaroslav@1258	1943	}
jaroslav@1258	1944	if (isOpaque()) {
jaroslav@1258	1945	sb.append(schemeSpecificPart);
jaroslav@1258	1946	} else {
jaroslav@1258	1947	if (host != null) {
jaroslav@1258	1948	sb.append("//");
jaroslav@1258	1949	if (userInfo != null) {
jaroslav@1258	1950	sb.append(userInfo);
jaroslav@1258	1951	sb.append('@');
jaroslav@1258	1952	}
jaroslav@1258	1953	boolean needBrackets = ((host.indexOf(':') >= 0)
jaroslav@1258	1954	&& !host.startsWith("[")
jaroslav@1258	1955	&& !host.endsWith("]"));
jaroslav@1258	1956	if (needBrackets) sb.append('[');
jaroslav@1258	1957	sb.append(host);
jaroslav@1258	1958	if (needBrackets) sb.append(']');
jaroslav@1258	1959	if (port != -1) {
jaroslav@1258	1960	sb.append(':');
jaroslav@1258	1961	sb.append(port);
jaroslav@1258	1962	}
jaroslav@1258	1963	} else if (authority != null) {
jaroslav@1258	1964	sb.append("//");
jaroslav@1258	1965	sb.append(authority);
jaroslav@1258	1966	}
jaroslav@1258	1967	if (path != null)
jaroslav@1258	1968	sb.append(path);
jaroslav@1258	1969	if (query != null) {
jaroslav@1258	1970	sb.append('?');
jaroslav@1258	1971	sb.append(query);
jaroslav@1258	1972	}
jaroslav@1258	1973	}
jaroslav@1258	1974	if (fragment != null) {
jaroslav@1258	1975	sb.append('#');
jaroslav@1258	1976	sb.append(fragment);
jaroslav@1258	1977	}
jaroslav@1258	1978	string = sb.toString();
jaroslav@1258	1979	}
jaroslav@1258	1980
jaroslav@1258	1981
jaroslav@1258	1982	// -- Normalization, resolution, and relativization --
jaroslav@1258	1983
jaroslav@1258	1984	// RFC2396 5.2 (6)
jaroslav@1258	1985	private static String resolvePath(String base, String child,
jaroslav@1258	1986	boolean absolute)
jaroslav@1258	1987	{
jaroslav@1258	1988	int i = base.lastIndexOf('/');
jaroslav@1258	1989	int cn = child.length();
jaroslav@1258	1990	String path = "";
jaroslav@1258	1991
jaroslav@1258	1992	if (cn == 0) {
jaroslav@1258	1993	// 5.2 (6a)
jaroslav@1258	1994	if (i >= 0)
jaroslav@1258	1995	path = base.substring(0, i + 1);
jaroslav@1258	1996	} else {
jaroslav@1258	1997	StringBuffer sb = new StringBuffer(base.length() + cn);
jaroslav@1258	1998	// 5.2 (6a)
jaroslav@1258	1999	if (i >= 0)
jaroslav@1258	2000	sb.append(base.substring(0, i + 1));
jaroslav@1258	2001	// 5.2 (6b)
jaroslav@1258	2002	sb.append(child);
jaroslav@1258	2003	path = sb.toString();
jaroslav@1258	2004	}
jaroslav@1258	2005
jaroslav@1258	2006	// 5.2 (6c-f)
jaroslav@1258	2007	String np = normalize(path);
jaroslav@1258	2008
jaroslav@1258	2009	// 5.2 (6g): If the result is absolute but the path begins with "../",
jaroslav@1258	2010	// then we simply leave the path as-is
jaroslav@1258	2011
jaroslav@1258	2012	return np;
jaroslav@1258	2013	}
jaroslav@1258	2014
jaroslav@1258	2015	// RFC2396 5.2
jaroslav@1258	2016	private static URI resolve(URI base, URI child) {
jaroslav@1258	2017	// check if child if opaque first so that NPE is thrown
jaroslav@1258	2018	// if child is null.
jaroslav@1258	2019	if (child.isOpaque() \|\| base.isOpaque())
jaroslav@1258	2020	return child;
jaroslav@1258	2021
jaroslav@1258	2022	// 5.2 (2): Reference to current document (lone fragment)
jaroslav@1258	2023	if ((child.scheme == null) && (child.authority == null)
jaroslav@1258	2024	&& child.path.equals("") && (child.fragment != null)
jaroslav@1258	2025	&& (child.query == null)) {
jaroslav@1258	2026	if ((base.fragment != null)
jaroslav@1258	2027	&& child.fragment.equals(base.fragment)) {
jaroslav@1258	2028	return base;
jaroslav@1258	2029	}
jaroslav@1258	2030	URI ru = new URI();
jaroslav@1258	2031	ru.scheme = base.scheme;
jaroslav@1258	2032	ru.authority = base.authority;
jaroslav@1258	2033	ru.userInfo = base.userInfo;
jaroslav@1258	2034	ru.host = base.host;
jaroslav@1258	2035	ru.port = base.port;
jaroslav@1258	2036	ru.path = base.path;
jaroslav@1258	2037	ru.fragment = child.fragment;
jaroslav@1258	2038	ru.query = base.query;
jaroslav@1258	2039	return ru;
jaroslav@1258	2040	}
jaroslav@1258	2041
jaroslav@1258	2042	// 5.2 (3): Child is absolute
jaroslav@1258	2043	if (child.scheme != null)
jaroslav@1258	2044	return child;
jaroslav@1258	2045
jaroslav@1258	2046	URI ru = new URI(); // Resolved URI
jaroslav@1258	2047	ru.scheme = base.scheme;
jaroslav@1258	2048	ru.query = child.query;
jaroslav@1258	2049	ru.fragment = child.fragment;
jaroslav@1258	2050
jaroslav@1258	2051	// 5.2 (4): Authority
jaroslav@1258	2052	if (child.authority == null) {
jaroslav@1258	2053	ru.authority = base.authority;
jaroslav@1258	2054	ru.host = base.host;
jaroslav@1258	2055	ru.userInfo = base.userInfo;
jaroslav@1258	2056	ru.port = base.port;
jaroslav@1258	2057
jaroslav@1258	2058	String cp = (child.path == null) ? "" : child.path;
jaroslav@1258	2059	if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
jaroslav@1258	2060	// 5.2 (5): Child path is absolute
jaroslav@1258	2061	ru.path = child.path;
jaroslav@1258	2062	} else {
jaroslav@1258	2063	// 5.2 (6): Resolve relative path
jaroslav@1258	2064	ru.path = resolvePath(base.path, cp, base.isAbsolute());
jaroslav@1258	2065	}
jaroslav@1258	2066	} else {
jaroslav@1258	2067	ru.authority = child.authority;
jaroslav@1258	2068	ru.host = child.host;
jaroslav@1258	2069	ru.userInfo = child.userInfo;
jaroslav@1258	2070	ru.host = child.host;
jaroslav@1258	2071	ru.port = child.port;
jaroslav@1258	2072	ru.path = child.path;
jaroslav@1258	2073	}
jaroslav@1258	2074
jaroslav@1258	2075	// 5.2 (7): Recombine (nothing to do here)
jaroslav@1258	2076	return ru;
jaroslav@1258	2077	}
jaroslav@1258	2078
jaroslav@1258	2079	// If the given URI's path is normal then return the URI;
jaroslav@1258	2080	// o.w., return a new URI containing the normalized path.
jaroslav@1258	2081	//
jaroslav@1258	2082	private static URI normalize(URI u) {
jaroslav@1258	2083	if (u.isOpaque() \|\| (u.path == null) \|\| (u.path.length() == 0))
jaroslav@1258	2084	return u;
jaroslav@1258	2085
jaroslav@1258	2086	String np = normalize(u.path);
jaroslav@1258	2087	if (np == u.path)
jaroslav@1258	2088	return u;
jaroslav@1258	2089
jaroslav@1258	2090	URI v = new URI();
jaroslav@1258	2091	v.scheme = u.scheme;
jaroslav@1258	2092	v.fragment = u.fragment;
jaroslav@1258	2093	v.authority = u.authority;
jaroslav@1258	2094	v.userInfo = u.userInfo;
jaroslav@1258	2095	v.host = u.host;
jaroslav@1258	2096	v.port = u.port;
jaroslav@1258	2097	v.path = np;
jaroslav@1258	2098	v.query = u.query;
jaroslav@1258	2099	return v;
jaroslav@1258	2100	}
jaroslav@1258	2101
jaroslav@1258	2102	// If both URIs are hierarchical, their scheme and authority components are
jaroslav@1258	2103	// identical, and the base path is a prefix of the child's path, then
jaroslav@1258	2104	// return a relative URI that, when resolved against the base, yields the
jaroslav@1258	2105	// child; otherwise, return the child.
jaroslav@1258	2106	//
jaroslav@1258	2107	private static URI relativize(URI base, URI child) {
jaroslav@1258	2108	// check if child if opaque first so that NPE is thrown
jaroslav@1258	2109	// if child is null.
jaroslav@1258	2110	if (child.isOpaque() \|\| base.isOpaque())
jaroslav@1258	2111	return child;
jaroslav@1258	2112	if (!equalIgnoringCase(base.scheme, child.scheme)
jaroslav@1258	2113	\|\| !equal(base.authority, child.authority))
jaroslav@1258	2114	return child;
jaroslav@1258	2115
jaroslav@1258	2116	String bp = normalize(base.path);
jaroslav@1258	2117	String cp = normalize(child.path);
jaroslav@1258	2118	if (!bp.equals(cp)) {
jaroslav@1258	2119	if (!bp.endsWith("/"))
jaroslav@1258	2120	bp = bp + "/";
jaroslav@1258	2121	if (!cp.startsWith(bp))
jaroslav@1258	2122	return child;
jaroslav@1258	2123	}
jaroslav@1258	2124
jaroslav@1258	2125	URI v = new URI();
jaroslav@1258	2126	v.path = cp.substring(bp.length());
jaroslav@1258	2127	v.query = child.query;
jaroslav@1258	2128	v.fragment = child.fragment;
jaroslav@1258	2129	return v;
jaroslav@1258	2130	}
jaroslav@1258	2131
jaroslav@1258	2132
jaroslav@1258	2133
jaroslav@1258	2134	// -- Path normalization --
jaroslav@1258	2135
jaroslav@1258	2136	// The following algorithm for path normalization avoids the creation of a
jaroslav@1258	2137	// string object for each segment, as well as the use of a string buffer to
jaroslav@1258	2138	// compute the final result, by using a single char array and editing it in
jaroslav@1258	2139	// place. The array is first split into segments, replacing each slash
jaroslav@1258	2140	// with '\0' and creating a segment-index array, each element of which is
jaroslav@1258	2141	// the index of the first char in the corresponding segment. We then walk
jaroslav@1258	2142	// through both arrays, removing ".", "..", and other segments as necessary
jaroslav@1258	2143	// by setting their entries in the index array to -1. Finally, the two
jaroslav@1258	2144	// arrays are used to rejoin the segments and compute the final result.
jaroslav@1258	2145	//
jaroslav@1258	2146	// This code is based upon src/solaris/native/java/io/canonicalize_md.c
jaroslav@1258	2147
jaroslav@1258	2148
jaroslav@1258	2149	// Check the given path to see if it might need normalization. A path
jaroslav@1258	2150	// might need normalization if it contains duplicate slashes, a "."
jaroslav@1258	2151	// segment, or a ".." segment. Return -1 if no further normalization is
jaroslav@1258	2152	// possible, otherwise return the number of segments found.
jaroslav@1258	2153	//
jaroslav@1258	2154	// This method takes a string argument rather than a char array so that
jaroslav@1258	2155	// this test can be performed without invoking path.toCharArray().
jaroslav@1258	2156	//
jaroslav@1258	2157	static private int needsNormalization(String path) {
jaroslav@1258	2158	boolean normal = true;
jaroslav@1258	2159	int ns = 0; // Number of segments
jaroslav@1258	2160	int end = path.length() - 1; // Index of last char in path
jaroslav@1258	2161	int p = 0; // Index of next char in path
jaroslav@1258	2162
jaroslav@1258	2163	// Skip initial slashes
jaroslav@1258	2164	while (p <= end) {
jaroslav@1258	2165	if (path.charAt(p) != '/') break;
jaroslav@1258	2166	p++;
jaroslav@1258	2167	}
jaroslav@1258	2168	if (p > 1) normal = false;
jaroslav@1258	2169
jaroslav@1258	2170	// Scan segments
jaroslav@1258	2171	while (p <= end) {
jaroslav@1258	2172
jaroslav@1258	2173	// Looking at "." or ".." ?
jaroslav@1258	2174	if ((path.charAt(p) == '.')
jaroslav@1258	2175	&& ((p == end)
jaroslav@1258	2176	\|\| ((path.charAt(p + 1) == '/')
jaroslav@1258	2177	\|\| ((path.charAt(p + 1) == '.')
jaroslav@1258	2178	&& ((p + 1 == end)
jaroslav@1258	2179	\|\| (path.charAt(p + 2) == '/')))))) {
jaroslav@1258	2180	normal = false;
jaroslav@1258	2181	}
jaroslav@1258	2182	ns++;
jaroslav@1258	2183
jaroslav@1258	2184	// Find beginning of next segment
jaroslav@1258	2185	while (p <= end) {
jaroslav@1258	2186	if (path.charAt(p++) != '/')
jaroslav@1258	2187	continue;
jaroslav@1258	2188
jaroslav@1258	2189	// Skip redundant slashes
jaroslav@1258	2190	while (p <= end) {
jaroslav@1258	2191	if (path.charAt(p) != '/') break;
jaroslav@1258	2192	normal = false;
jaroslav@1258	2193	p++;
jaroslav@1258	2194	}
jaroslav@1258	2195
jaroslav@1258	2196	break;
jaroslav@1258	2197	}
jaroslav@1258	2198	}
jaroslav@1258	2199
jaroslav@1258	2200	return normal ? -1 : ns;
jaroslav@1258	2201	}
jaroslav@1258	2202
jaroslav@1258	2203
jaroslav@1258	2204	// Split the given path into segments, replacing slashes with nulls and
jaroslav@1258	2205	// filling in the given segment-index array.
jaroslav@1258	2206	//
jaroslav@1258	2207	// Preconditions:
jaroslav@1258	2208	// segs.length == Number of segments in path
jaroslav@1258	2209	//
jaroslav@1258	2210	// Postconditions:
jaroslav@1258	2211	// All slashes in path replaced by '\0'
jaroslav@1258	2212	// segs[i] == Index of first char in segment i (0 <= i < segs.length)
jaroslav@1258	2213	//
jaroslav@1258	2214	static private void split(char[] path, int[] segs) {
jaroslav@1258	2215	int end = path.length - 1; // Index of last char in path
jaroslav@1258	2216	int p = 0; // Index of next char in path
jaroslav@1258	2217	int i = 0; // Index of current segment
jaroslav@1258	2218
jaroslav@1258	2219	// Skip initial slashes
jaroslav@1258	2220	while (p <= end) {
jaroslav@1258	2221	if (path[p] != '/') break;
jaroslav@1258	2222	path[p] = '\0';
jaroslav@1258	2223	p++;
jaroslav@1258	2224	}
jaroslav@1258	2225
jaroslav@1258	2226	while (p <= end) {
jaroslav@1258	2227
jaroslav@1258	2228	// Note start of segment
jaroslav@1258	2229	segs[i++] = p++;
jaroslav@1258	2230
jaroslav@1258	2231	// Find beginning of next segment
jaroslav@1258	2232	while (p <= end) {
jaroslav@1258	2233	if (path[p++] != '/')
jaroslav@1258	2234	continue;
jaroslav@1258	2235	path[p - 1] = '\0';
jaroslav@1258	2236
jaroslav@1258	2237	// Skip redundant slashes
jaroslav@1258	2238	while (p <= end) {
jaroslav@1258	2239	if (path[p] != '/') break;
jaroslav@1258	2240	path[p++] = '\0';
jaroslav@1258	2241	}
jaroslav@1258	2242	break;
jaroslav@1258	2243	}
jaroslav@1258	2244	}
jaroslav@1258	2245
jaroslav@1258	2246	if (i != segs.length)
jaroslav@1258	2247	throw new InternalError(); // ASSERT
jaroslav@1258	2248	}
jaroslav@1258	2249
jaroslav@1258	2250
jaroslav@1258	2251	// Join the segments in the given path according to the given segment-index
jaroslav@1258	2252	// array, ignoring those segments whose index entries have been set to -1,
jaroslav@1258	2253	// and inserting slashes as needed. Return the length of the resulting
jaroslav@1258	2254	// path.
jaroslav@1258	2255	//
jaroslav@1258	2256	// Preconditions:
jaroslav@1258	2257	// segs[i] == -1 implies segment i is to be ignored
jaroslav@1258	2258	// path computed by split, as above, with '\0' having replaced '/'
jaroslav@1258	2259	//
jaroslav@1258	2260	// Postconditions:
jaroslav@1258	2261	// path[0] .. path[return value] == Resulting path
jaroslav@1258	2262	//
jaroslav@1258	2263	static private int join(char[] path, int[] segs) {
jaroslav@1258	2264	int ns = segs.length; // Number of segments
jaroslav@1258	2265	int end = path.length - 1; // Index of last char in path
jaroslav@1258	2266	int p = 0; // Index of next path char to write
jaroslav@1258	2267
jaroslav@1258	2268	if (path[p] == '\0') {
jaroslav@1258	2269	// Restore initial slash for absolute paths
jaroslav@1258	2270	path[p++] = '/';
jaroslav@1258	2271	}
jaroslav@1258	2272
jaroslav@1258	2273	for (int i = 0; i < ns; i++) {
jaroslav@1258	2274	int q = segs[i]; // Current segment
jaroslav@1258	2275	if (q == -1)
jaroslav@1258	2276	// Ignore this segment
jaroslav@1258	2277	continue;
jaroslav@1258	2278
jaroslav@1258	2279	if (p == q) {
jaroslav@1258	2280	// We're already at this segment, so just skip to its end
jaroslav@1258	2281	while ((p <= end) && (path[p] != '\0'))
jaroslav@1258	2282	p++;
jaroslav@1258	2283	if (p <= end) {
jaroslav@1258	2284	// Preserve trailing slash
jaroslav@1258	2285	path[p++] = '/';
jaroslav@1258	2286	}
jaroslav@1258	2287	} else if (p < q) {
jaroslav@1258	2288	// Copy q down to p
jaroslav@1258	2289	while ((q <= end) && (path[q] != '\0'))
jaroslav@1258	2290	path[p++] = path[q++];
jaroslav@1258	2291	if (q <= end) {
jaroslav@1258	2292	// Preserve trailing slash
jaroslav@1258	2293	path[p++] = '/';
jaroslav@1258	2294	}
jaroslav@1258	2295	} else
jaroslav@1258	2296	throw new InternalError(); // ASSERT false
jaroslav@1258	2297	}
jaroslav@1258	2298
jaroslav@1258	2299	return p;
jaroslav@1258	2300	}
jaroslav@1258	2301
jaroslav@1258	2302
jaroslav@1258	2303	// Remove "." segments from the given path, and remove segment pairs
jaroslav@1258	2304	// consisting of a non-".." segment followed by a ".." segment.
jaroslav@1258	2305	//
jaroslav@1258	2306	private static void removeDots(char[] path, int[] segs) {
jaroslav@1258	2307	int ns = segs.length;
jaroslav@1258	2308	int end = path.length - 1;
jaroslav@1258	2309
jaroslav@1258	2310	for (int i = 0; i < ns; i++) {
jaroslav@1258	2311	int dots = 0; // Number of dots found (0, 1, or 2)
jaroslav@1258	2312
jaroslav@1258	2313	// Find next occurrence of "." or ".."
jaroslav@1258	2314	do {
jaroslav@1258	2315	int p = segs[i];
jaroslav@1258	2316	if (path[p] == '.') {
jaroslav@1258	2317	if (p == end) {
jaroslav@1258	2318	dots = 1;
jaroslav@1258	2319	break;
jaroslav@1258	2320	} else if (path[p + 1] == '\0') {
jaroslav@1258	2321	dots = 1;
jaroslav@1258	2322	break;
jaroslav@1258	2323	} else if ((path[p + 1] == '.')
jaroslav@1258	2324	&& ((p + 1 == end)
jaroslav@1258	2325	\|\| (path[p + 2] == '\0'))) {
jaroslav@1258	2326	dots = 2;
jaroslav@1258	2327	break;
jaroslav@1258	2328	}
jaroslav@1258	2329	}
jaroslav@1258	2330	i++;
jaroslav@1258	2331	} while (i < ns);
jaroslav@1258	2332	if ((i > ns) \|\| (dots == 0))
jaroslav@1258	2333	break;
jaroslav@1258	2334
jaroslav@1258	2335	if (dots == 1) {
jaroslav@1258	2336	// Remove this occurrence of "."
jaroslav@1258	2337	segs[i] = -1;
jaroslav@1258	2338	} else {
jaroslav@1258	2339	// If there is a preceding non-".." segment, remove both that
jaroslav@1258	2340	// segment and this occurrence of ".."; otherwise, leave this
jaroslav@1258	2341	// ".." segment as-is.
jaroslav@1258	2342	int j;
jaroslav@1258	2343	for (j = i - 1; j >= 0; j--) {
jaroslav@1258	2344	if (segs[j] != -1) break;
jaroslav@1258	2345	}
jaroslav@1258	2346	if (j >= 0) {
jaroslav@1258	2347	int q = segs[j];
jaroslav@1258	2348	if (!((path[q] == '.')
jaroslav@1258	2349	&& (path[q + 1] == '.')
jaroslav@1258	2350	&& (path[q + 2] == '\0'))) {
jaroslav@1258	2351	segs[i] = -1;
jaroslav@1258	2352	segs[j] = -1;
jaroslav@1258	2353	}
jaroslav@1258	2354	}
jaroslav@1258	2355	}
jaroslav@1258	2356	}
jaroslav@1258	2357	}
jaroslav@1258	2358
jaroslav@1258	2359
jaroslav@1258	2360	// DEVIATION: If the normalized path is relative, and if the first
jaroslav@1258	2361	// segment could be parsed as a scheme name, then prepend a "." segment
jaroslav@1258	2362	//
jaroslav@1258	2363	private static void maybeAddLeadingDot(char[] path, int[] segs) {
jaroslav@1258	2364
jaroslav@1258	2365	if (path[0] == '\0')
jaroslav@1258	2366	// The path is absolute
jaroslav@1258	2367	return;
jaroslav@1258	2368
jaroslav@1258	2369	int ns = segs.length;
jaroslav@1258	2370	int f = 0; // Index of first segment
jaroslav@1258	2371	while (f < ns) {
jaroslav@1258	2372	if (segs[f] >= 0)
jaroslav@1258	2373	break;
jaroslav@1258	2374	f++;
jaroslav@1258	2375	}
jaroslav@1258	2376	if ((f >= ns) \|\| (f == 0))
jaroslav@1258	2377	// The path is empty, or else the original first segment survived,
jaroslav@1258	2378	// in which case we already know that no leading "." is needed
jaroslav@1258	2379	return;
jaroslav@1258	2380
jaroslav@1258	2381	int p = segs[f];
jaroslav@1258	2382	while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
jaroslav@1258	2383	if (p >= path.length \|\| path[p] == '\0')
jaroslav@1258	2384	// No colon in first segment, so no "." needed
jaroslav@1258	2385	return;
jaroslav@1258	2386
jaroslav@1258	2387	// At this point we know that the first segment is unused,
jaroslav@1258	2388	// hence we can insert a "." segment at that position
jaroslav@1258	2389	path[0] = '.';
jaroslav@1258	2390	path[1] = '\0';
jaroslav@1258	2391	segs[0] = 0;
jaroslav@1258	2392	}
jaroslav@1258	2393
jaroslav@1258	2394
jaroslav@1258	2395	// Normalize the given path string. A normal path string has no empty
jaroslav@1258	2396	// segments (i.e., occurrences of "//"), no segments equal to ".", and no
jaroslav@1258	2397	// segments equal to ".." that are preceded by a segment not equal to "..".
jaroslav@1258	2398	// In contrast to Unix-style pathname normalization, for URI paths we
jaroslav@1258	2399	// always retain trailing slashes.
jaroslav@1258	2400	//
jaroslav@1258	2401	private static String normalize(String ps) {
jaroslav@1258	2402
jaroslav@1258	2403	// Does this path need normalization?
jaroslav@1258	2404	int ns = needsNormalization(ps); // Number of segments
jaroslav@1258	2405	if (ns < 0)
jaroslav@1258	2406	// Nope -- just return it
jaroslav@1258	2407	return ps;
jaroslav@1258	2408
jaroslav@1258	2409	char[] path = ps.toCharArray(); // Path in char-array form
jaroslav@1258	2410
jaroslav@1258	2411	// Split path into segments
jaroslav@1258	2412	int[] segs = new int[ns]; // Segment-index array
jaroslav@1258	2413	split(path, segs);
jaroslav@1258	2414
jaroslav@1258	2415	// Remove dots
jaroslav@1258	2416	removeDots(path, segs);
jaroslav@1258	2417
jaroslav@1258	2418	// Prevent scheme-name confusion
jaroslav@1258	2419	maybeAddLeadingDot(path, segs);
jaroslav@1258	2420
jaroslav@1258	2421	// Join the remaining segments and return the result
jaroslav@1258	2422	String s = new String(path, 0, join(path, segs));
jaroslav@1258	2423	if (s.equals(ps)) {
jaroslav@1258	2424	// string was already normalized
jaroslav@1258	2425	return ps;
jaroslav@1258	2426	}
jaroslav@1258	2427	return s;
jaroslav@1258	2428	}
jaroslav@1258	2429
jaroslav@1258	2430
jaroslav@1258	2431
jaroslav@1258	2432	// -- Character classes for parsing --
jaroslav@1258	2433
jaroslav@1258	2434	// RFC2396 precisely specifies which characters in the US-ASCII charset are
jaroslav@1258	2435	// permissible in the various components of a URI reference. We here
jaroslav@1258	2436	// define a set of mask pairs to aid in enforcing these restrictions. Each
jaroslav@1258	2437	// mask pair consists of two longs, a low mask and a high mask. Taken
jaroslav@1258	2438	// together they represent a 128-bit mask, where bit i is set iff the
jaroslav@1258	2439	// character with value i is permitted.
jaroslav@1258	2440	//
jaroslav@1258	2441	// This approach is more efficient than sequentially searching arrays of
jaroslav@1258	2442	// permitted characters. It could be made still more efficient by
jaroslav@1258	2443	// precompiling the mask information so that a character's presence in a
jaroslav@1258	2444	// given mask could be determined by a single table lookup.
jaroslav@1258	2445
jaroslav@1258	2446	// Compute the low-order mask for the characters in the given string
jaroslav@1258	2447	private static long lowMask(String chars) {
jaroslav@1258	2448	int n = chars.length();
jaroslav@1258	2449	long m = 0;
jaroslav@1258	2450	for (int i = 0; i < n; i++) {
jaroslav@1258	2451	char c = chars.charAt(i);
jaroslav@1258	2452	if (c < 64)
jaroslav@1258	2453	m \|= (1L << c);
jaroslav@1258	2454	}
jaroslav@1258	2455	return m;
jaroslav@1258	2456	}
jaroslav@1258	2457
jaroslav@1258	2458	// Compute the high-order mask for the characters in the given string
jaroslav@1258	2459	private static long highMask(String chars) {
jaroslav@1258	2460	int n = chars.length();
jaroslav@1258	2461	long m = 0;
jaroslav@1258	2462	for (int i = 0; i < n; i++) {
jaroslav@1258	2463	char c = chars.charAt(i);
jaroslav@1258	2464	if ((c >= 64) && (c < 128))
jaroslav@1258	2465	m \|= (1L << (c - 64));
jaroslav@1258	2466	}
jaroslav@1258	2467	return m;
jaroslav@1258	2468	}
jaroslav@1258	2469
jaroslav@1258	2470	// Compute a low-order mask for the characters
jaroslav@1258	2471	// between first and last, inclusive
jaroslav@1258	2472	private static long lowMask(char first, char last) {
jaroslav@1258	2473	long m = 0;
jaroslav@1258	2474	int f = Math.max(Math.min(first, 63), 0);
jaroslav@1258	2475	int l = Math.max(Math.min(last, 63), 0);
jaroslav@1258	2476	for (int i = f; i <= l; i++)
jaroslav@1258	2477	m \|= 1L << i;
jaroslav@1258	2478	return m;
jaroslav@1258	2479	}
jaroslav@1258	2480
jaroslav@1258	2481	// Compute a high-order mask for the characters
jaroslav@1258	2482	// between first and last, inclusive
jaroslav@1258	2483	private static long highMask(char first, char last) {
jaroslav@1258	2484	long m = 0;
jaroslav@1258	2485	int f = Math.max(Math.min(first, 127), 64) - 64;
jaroslav@1258	2486	int l = Math.max(Math.min(last, 127), 64) - 64;
jaroslav@1258	2487	for (int i = f; i <= l; i++)
jaroslav@1258	2488	m \|= 1L << i;
jaroslav@1258	2489	return m;
jaroslav@1258	2490	}
jaroslav@1258	2491
jaroslav@1258	2492	// Tell whether the given character is permitted by the given mask pair
jaroslav@1258	2493	private static boolean match(char c, long lowMask, long highMask) {
jaroslav@1258	2494	if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
jaroslav@1258	2495	return false;
jaroslav@1258	2496	if (c < 64)
jaroslav@1258	2497	return ((1L << c) & lowMask) != 0;
jaroslav@1258	2498	if (c < 128)
jaroslav@1258	2499	return ((1L << (c - 64)) & highMask) != 0;
jaroslav@1258	2500	return false;
jaroslav@1258	2501	}
jaroslav@1258	2502
jaroslav@1258	2503	// Character-class masks, in reverse order from RFC2396 because
jaroslav@1258	2504	// initializers for static fields cannot make forward references.
jaroslav@1258	2505
jaroslav@1258	2506	// digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \|
jaroslav@1258	2507	// "8" \| "9"
jaroslav@1258	2508	private static final long L_DIGIT = lowMask('0', '9');
jaroslav@1258	2509	private static final long H_DIGIT = 0L;
jaroslav@1258	2510
jaroslav@1258	2511	// upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \|
jaroslav@1258	2512	// "J" \| "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \|
jaroslav@1258	2513	// "S" \| "T" \| "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
jaroslav@1258	2514	private static final long L_UPALPHA = 0L;
jaroslav@1258	2515	private static final long H_UPALPHA = highMask('A', 'Z');
jaroslav@1258	2516
jaroslav@1258	2517	// lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \|
jaroslav@1258	2518	// "j" \| "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \|
jaroslav@1258	2519	// "s" \| "t" \| "u" \| "v" \| "w" \| "x" \| "y" \| "z"
jaroslav@1258	2520	private static final long L_LOWALPHA = 0L;
jaroslav@1258	2521	private static final long H_LOWALPHA = highMask('a', 'z');
jaroslav@1258	2522
jaroslav@1258	2523	// alpha = lowalpha \| upalpha
jaroslav@1258	2524	private static final long L_ALPHA = L_LOWALPHA \| L_UPALPHA;
jaroslav@1258	2525	private static final long H_ALPHA = H_LOWALPHA \| H_UPALPHA;
jaroslav@1258	2526
jaroslav@1258	2527	// alphanum = alpha \| digit
jaroslav@1258	2528	private static final long L_ALPHANUM = L_DIGIT \| L_ALPHA;
jaroslav@1258	2529	private static final long H_ALPHANUM = H_DIGIT \| H_ALPHA;
jaroslav@1258	2530
jaroslav@1258	2531	// hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
jaroslav@1258	2532	// "a" \| "b" \| "c" \| "d" \| "e" \| "f"
jaroslav@1258	2533	private static final long L_HEX = L_DIGIT;
jaroslav@1258	2534	private static final long H_HEX = highMask('A', 'F') \| highMask('a', 'f');
jaroslav@1258	2535
jaroslav@1258	2536	// mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \|
jaroslav@1258	2537	// "(" \| ")"
jaroslav@1258	2538	private static final long L_MARK = lowMask("-_.!~*'()");
jaroslav@1258	2539	private static final long H_MARK = highMask("-_.!~*'()");
jaroslav@1258	2540
jaroslav@1258	2541	// unreserved = alphanum \| mark
jaroslav@1258	2542	private static final long L_UNRESERVED = L_ALPHANUM \| L_MARK;
jaroslav@1258	2543	private static final long H_UNRESERVED = H_ALPHANUM \| H_MARK;
jaroslav@1258	2544
jaroslav@1258	2545	// reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
jaroslav@1258	2546	// "$" \| "," \| "[" \| "]"
jaroslav@1258	2547	// Added per RFC2732: "[", "]"
jaroslav@1258	2548	private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
jaroslav@1258	2549	private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
jaroslav@1258	2550
jaroslav@1258	2551	// The zero'th bit is used to indicate that escape pairs and non-US-ASCII
jaroslav@1258	2552	// characters are allowed; this is handled by the scanEscape method below.
jaroslav@1258	2553	private static final long L_ESCAPED = 1L;
jaroslav@1258	2554	private static final long H_ESCAPED = 0L;
jaroslav@1258	2555
jaroslav@1258	2556	// uric = reserved \| unreserved \| escaped
jaroslav@1258	2557	private static final long L_URIC = L_RESERVED \| L_UNRESERVED \| L_ESCAPED;
jaroslav@1258	2558	private static final long H_URIC = H_RESERVED \| H_UNRESERVED \| H_ESCAPED;
jaroslav@1258	2559
jaroslav@1258	2560	// pchar = unreserved \| escaped \|
jaroslav@1258	2561	// ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
jaroslav@1258	2562	private static final long L_PCHAR
jaroslav@1258	2563	= L_UNRESERVED \| L_ESCAPED \| lowMask(":@&=+$,");
jaroslav@1258	2564	private static final long H_PCHAR
jaroslav@1258	2565	= H_UNRESERVED \| H_ESCAPED \| highMask(":@&=+$,");
jaroslav@1258	2566
jaroslav@1258	2567	// All valid path characters
jaroslav@1258	2568	private static final long L_PATH = L_PCHAR \| lowMask(";/");
jaroslav@1258	2569	private static final long H_PATH = H_PCHAR \| highMask(";/");
jaroslav@1258	2570
jaroslav@1258	2571	// Dash, for use in domainlabel and toplabel
jaroslav@1258	2572	private static final long L_DASH = lowMask("-");
jaroslav@1258	2573	private static final long H_DASH = highMask("-");
jaroslav@1258	2574
jaroslav@1258	2575	// Dot, for use in hostnames
jaroslav@1258	2576	private static final long L_DOT = lowMask(".");
jaroslav@1258	2577	private static final long H_DOT = highMask(".");
jaroslav@1258	2578
jaroslav@1258	2579	// userinfo = *( unreserved \| escaped \|
jaroslav@1258	2580	// ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
jaroslav@1258	2581	private static final long L_USERINFO
jaroslav@1258	2582	= L_UNRESERVED \| L_ESCAPED \| lowMask(";:&=+$,");
jaroslav@1258	2583	private static final long H_USERINFO
jaroslav@1258	2584	= H_UNRESERVED \| H_ESCAPED \| highMask(";:&=+$,");
jaroslav@1258	2585
jaroslav@1258	2586	// reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
jaroslav@1258	2587	// ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
jaroslav@1258	2588	private static final long L_REG_NAME
jaroslav@1258	2589	= L_UNRESERVED \| L_ESCAPED \| lowMask("$,;:@&=+");
jaroslav@1258	2590	private static final long H_REG_NAME
jaroslav@1258	2591	= H_UNRESERVED \| H_ESCAPED \| highMask("$,;:@&=+");
jaroslav@1258	2592
jaroslav@1258	2593	// All valid characters for server-based authorities
jaroslav@1258	2594	private static final long L_SERVER
jaroslav@1258	2595	= L_USERINFO \| L_ALPHANUM \| L_DASH \| lowMask(".:@[]");
jaroslav@1258	2596	private static final long H_SERVER
jaroslav@1258	2597	= H_USERINFO \| H_ALPHANUM \| H_DASH \| highMask(".:@[]");
jaroslav@1258	2598
jaroslav@1258	2599	// Special case of server authority that represents an IPv6 address
jaroslav@1258	2600	// In this case, a % does not signify an escape sequence
jaroslav@1258	2601	private static final long L_SERVER_PERCENT
jaroslav@1258	2602	= L_SERVER \| lowMask("%");
jaroslav@1258	2603	private static final long H_SERVER_PERCENT
jaroslav@1258	2604	= H_SERVER \| highMask("%");
jaroslav@1258	2605	private static final long L_LEFT_BRACKET = lowMask("[");
jaroslav@1258	2606	private static final long H_LEFT_BRACKET = highMask("[");
jaroslav@1258	2607
jaroslav@1258	2608	// scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
jaroslav@1258	2609	private static final long L_SCHEME = L_ALPHA \| L_DIGIT \| lowMask("+-.");
jaroslav@1258	2610	private static final long H_SCHEME = H_ALPHA \| H_DIGIT \| highMask("+-.");
jaroslav@1258	2611
jaroslav@1258	2612	// uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
jaroslav@1258	2613	// "&" \| "=" \| "+" \| "$" \| ","
jaroslav@1258	2614	private static final long L_URIC_NO_SLASH
jaroslav@1258	2615	= L_UNRESERVED \| L_ESCAPED \| lowMask(";?:@&=+$,");
jaroslav@1258	2616	private static final long H_URIC_NO_SLASH
jaroslav@1258	2617	= H_UNRESERVED \| H_ESCAPED \| highMask(";?:@&=+$,");
jaroslav@1258	2618
jaroslav@1258	2619
jaroslav@1258	2620	// -- Escaping and encoding --
jaroslav@1258	2621
jaroslav@1258	2622	private final static char[] hexDigits = {
jaroslav@1258	2623	'0', '1', '2', '3', '4', '5', '6', '7',
jaroslav@1258	2624	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
jaroslav@1258	2625	};
jaroslav@1258	2626
jaroslav@1258	2627	private static void appendEscape(StringBuffer sb, byte b) {
jaroslav@1258	2628	sb.append('%');
jaroslav@1258	2629	sb.append(hexDigits[(b >> 4) & 0x0f]);
jaroslav@1258	2630	sb.append(hexDigits[(b >> 0) & 0x0f]);
jaroslav@1258	2631	}
jaroslav@1258	2632
jaroslav@1258	2633	private static void appendEncoded(StringBuffer sb, char c) {
jaroslav@1258	2634	ByteBuffer bb = null;
jaroslav@1258	2635	try {
jaroslav@1258	2636	bb = ThreadLocalCoders.encoderFor("UTF-8")
jaroslav@1258	2637	.encode(CharBuffer.wrap("" + c));
jaroslav@1258	2638	} catch (CharacterCodingException x) {
jaroslav@1258	2639	assert false;
jaroslav@1258	2640	}
jaroslav@1258	2641	while (bb.hasRemaining()) {
jaroslav@1258	2642	int b = bb.get() & 0xff;
jaroslav@1258	2643	if (b >= 0x80)
jaroslav@1258	2644	appendEscape(sb, (byte)b);
jaroslav@1258	2645	else
jaroslav@1258	2646	sb.append((char)b);
jaroslav@1258	2647	}
jaroslav@1258	2648	}
jaroslav@1258	2649
jaroslav@1258	2650	// Quote any characters in s that are not permitted
jaroslav@1258	2651	// by the given mask pair
jaroslav@1258	2652	//
jaroslav@1258	2653	private static String quote(String s, long lowMask, long highMask) {
jaroslav@1258	2654	int n = s.length();
jaroslav@1258	2655	StringBuffer sb = null;
jaroslav@1258	2656	boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
jaroslav@1258	2657	for (int i = 0; i < s.length(); i++) {
jaroslav@1258	2658	char c = s.charAt(i);
jaroslav@1258	2659	if (c < '\u0080') {
jaroslav@1258	2660	if (!match(c, lowMask, highMask)) {
jaroslav@1258	2661	if (sb == null) {
jaroslav@1258	2662	sb = new StringBuffer();
jaroslav@1258	2663	sb.append(s.substring(0, i));
jaroslav@1258	2664	}
jaroslav@1258	2665	appendEscape(sb, (byte)c);
jaroslav@1258	2666	} else {
jaroslav@1258	2667	if (sb != null)
jaroslav@1258	2668	sb.append(c);
jaroslav@1258	2669	}
jaroslav@1258	2670	} else if (allowNonASCII
jaroslav@1258	2671	&& (Character.isSpaceChar(c)
jaroslav@1258	2672	\|\| Character.isISOControl(c))) {
jaroslav@1258	2673	if (sb == null) {
jaroslav@1258	2674	sb = new StringBuffer();
jaroslav@1258	2675	sb.append(s.substring(0, i));
jaroslav@1258	2676	}
jaroslav@1258	2677	appendEncoded(sb, c);
jaroslav@1258	2678	} else {
jaroslav@1258	2679	if (sb != null)
jaroslav@1258	2680	sb.append(c);
jaroslav@1258	2681	}
jaroslav@1258	2682	}
jaroslav@1258	2683	return (sb == null) ? s : sb.toString();
jaroslav@1258	2684	}
jaroslav@1258	2685
jaroslav@1258	2686	// Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
jaroslav@1258	2687	// assuming that s is otherwise legal
jaroslav@1258	2688	//
jaroslav@1258	2689	private static String encode(String s) {
jaroslav@1258	2690	int n = s.length();
jaroslav@1258	2691	if (n == 0)
jaroslav@1258	2692	return s;
jaroslav@1258	2693
jaroslav@1258	2694	// First check whether we actually need to encode
jaroslav@1258	2695	for (int i = 0;;) {
jaroslav@1258	2696	if (s.charAt(i) >= '\u0080')
jaroslav@1258	2697	break;
jaroslav@1258	2698	if (++i >= n)
jaroslav@1258	2699	return s;
jaroslav@1258	2700	}
jaroslav@1258	2701
jaroslav@1258	2702	String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
jaroslav@1258	2703	ByteBuffer bb = null;
jaroslav@1258	2704	try {
jaroslav@1258	2705	bb = ThreadLocalCoders.encoderFor("UTF-8")
jaroslav@1258	2706	.encode(CharBuffer.wrap(ns));
jaroslav@1258	2707	} catch (CharacterCodingException x) {
jaroslav@1258	2708	assert false;
jaroslav@1258	2709	}
jaroslav@1258	2710
jaroslav@1258	2711	StringBuffer sb = new StringBuffer();
jaroslav@1258	2712	while (bb.hasRemaining()) {
jaroslav@1258	2713	int b = bb.get() & 0xff;
jaroslav@1258	2714	if (b >= 0x80)
jaroslav@1258	2715	appendEscape(sb, (byte)b);
jaroslav@1258	2716	else
jaroslav@1258	2717	sb.append((char)b);
jaroslav@1258	2718	}
jaroslav@1258	2719	return sb.toString();
jaroslav@1258	2720	}
jaroslav@1258	2721
jaroslav@1258	2722	private static int decode(char c) {
jaroslav@1258	2723	if ((c >= '0') && (c <= '9'))
jaroslav@1258	2724	return c - '0';
jaroslav@1258	2725	if ((c >= 'a') && (c <= 'f'))
jaroslav@1258	2726	return c - 'a' + 10;
jaroslav@1258	2727	if ((c >= 'A') && (c <= 'F'))
jaroslav@1258	2728	return c - 'A' + 10;
jaroslav@1258	2729	assert false;
jaroslav@1258	2730	return -1;
jaroslav@1258	2731	}
jaroslav@1258	2732
jaroslav@1258	2733	private static byte decode(char c1, char c2) {
jaroslav@1258	2734	return (byte)( ((decode(c1) & 0xf) << 4)
jaroslav@1258	2735	\| ((decode(c2) & 0xf) << 0));
jaroslav@1258	2736	}
jaroslav@1258	2737
jaroslav@1258	2738	// Evaluates all escapes in s, applying UTF-8 decoding if needed. Assumes
jaroslav@1258	2739	// that escapes are well-formed syntactically, i.e., of the form %XX. If a
jaroslav@1258	2740	// sequence of escaped octets is not valid UTF-8 then the erroneous octets
jaroslav@1258	2741	// are replaced with '\uFFFD'.
jaroslav@1258	2742	// Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
jaroslav@1258	2743	// with a scope_id
jaroslav@1258	2744	//
jaroslav@1258	2745	private static String decode(String s) {
jaroslav@1258	2746	if (s == null)
jaroslav@1258	2747	return s;
jaroslav@1258	2748	int n = s.length();
jaroslav@1258	2749	if (n == 0)
jaroslav@1258	2750	return s;
jaroslav@1258	2751	if (s.indexOf('%') < 0)
jaroslav@1258	2752	return s;
jaroslav@1258	2753
jaroslav@1258	2754	StringBuffer sb = new StringBuffer(n);
jaroslav@1258	2755	ByteBuffer bb = ByteBuffer.allocate(n);
jaroslav@1258	2756	CharBuffer cb = CharBuffer.allocate(n);
jaroslav@1258	2757	CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
jaroslav@1258	2758	.onMalformedInput(CodingErrorAction.REPLACE)
jaroslav@1258	2759	.onUnmappableCharacter(CodingErrorAction.REPLACE);
jaroslav@1258	2760
jaroslav@1258	2761	// This is not horribly efficient, but it will do for now
jaroslav@1258	2762	char c = s.charAt(0);
jaroslav@1258	2763	boolean betweenBrackets = false;
jaroslav@1258	2764
jaroslav@1258	2765	for (int i = 0; i < n;) {
jaroslav@1258	2766	assert c == s.charAt(i); // Loop invariant
jaroslav@1258	2767	if (c == '[') {
jaroslav@1258	2768	betweenBrackets = true;
jaroslav@1258	2769	} else if (betweenBrackets && c == ']') {
jaroslav@1258	2770	betweenBrackets = false;
jaroslav@1258	2771	}
jaroslav@1258	2772	if (c != '%' \|\| betweenBrackets) {
jaroslav@1258	2773	sb.append(c);
jaroslav@1258	2774	if (++i >= n)
jaroslav@1258	2775	break;
jaroslav@1258	2776	c = s.charAt(i);
jaroslav@1258	2777	continue;
jaroslav@1258	2778	}
jaroslav@1258	2779	bb.clear();
jaroslav@1258	2780	int ui = i;
jaroslav@1258	2781	for (;;) {
jaroslav@1258	2782	assert (n - i >= 2);
jaroslav@1258	2783	bb.put(decode(s.charAt(++i), s.charAt(++i)));
jaroslav@1258	2784	if (++i >= n)
jaroslav@1258	2785	break;
jaroslav@1258	2786	c = s.charAt(i);
jaroslav@1258	2787	if (c != '%')
jaroslav@1258	2788	break;
jaroslav@1258	2789	}
jaroslav@1258	2790	bb.flip();
jaroslav@1258	2791	cb.clear();
jaroslav@1258	2792	dec.reset();
jaroslav@1258	2793	CoderResult cr = dec.decode(bb, cb, true);
jaroslav@1258	2794	assert cr.isUnderflow();
jaroslav@1258	2795	cr = dec.flush(cb);
jaroslav@1258	2796	assert cr.isUnderflow();
jaroslav@1258	2797	sb.append(cb.flip().toString());
jaroslav@1258	2798	}
jaroslav@1258	2799
jaroslav@1258	2800	return sb.toString();
jaroslav@1258	2801	}
jaroslav@1258	2802
jaroslav@1258	2803
jaroslav@1258	2804	// -- Parsing --
jaroslav@1258	2805
jaroslav@1258	2806	// For convenience we wrap the input URI string in a new instance of the
jaroslav@1258	2807	// following internal class. This saves always having to pass the input
jaroslav@1258	2808	// string as an argument to each internal scan/parse method.
jaroslav@1258	2809
jaroslav@1258	2810	private class Parser {
jaroslav@1258	2811
jaroslav@1258	2812	private String input; // URI input string
jaroslav@1258	2813	private boolean requireServerAuthority = false;
jaroslav@1258	2814
jaroslav@1258	2815	Parser(String s) {
jaroslav@1258	2816	input = s;
jaroslav@1258	2817	string = s;
jaroslav@1258	2818	}
jaroslav@1258	2819
jaroslav@1258	2820	// -- Methods for throwing URISyntaxException in various ways --
jaroslav@1258	2821
jaroslav@1258	2822	private void fail(String reason) throws URISyntaxException {
jaroslav@1258	2823	throw new URISyntaxException(input, reason);
jaroslav@1258	2824	}
jaroslav@1258	2825
jaroslav@1258	2826	private void fail(String reason, int p) throws URISyntaxException {
jaroslav@1258	2827	throw new URISyntaxException(input, reason, p);
jaroslav@1258	2828	}
jaroslav@1258	2829
jaroslav@1258	2830	private void failExpecting(String expected, int p)
jaroslav@1258	2831	throws URISyntaxException
jaroslav@1258	2832	{
jaroslav@1258	2833	fail("Expected " + expected, p);
jaroslav@1258	2834	}
jaroslav@1258	2835
jaroslav@1258	2836	private void failExpecting(String expected, String prior, int p)
jaroslav@1258	2837	throws URISyntaxException
jaroslav@1258	2838	{
jaroslav@1258	2839	fail("Expected " + expected + " following " + prior, p);
jaroslav@1258	2840	}
jaroslav@1258	2841
jaroslav@1258	2842
jaroslav@1258	2843	// -- Simple access to the input string --
jaroslav@1258	2844
jaroslav@1258	2845	// Return a substring of the input string
jaroslav@1258	2846	//
jaroslav@1258	2847	private String substring(int start, int end) {
jaroslav@1258	2848	return input.substring(start, end);
jaroslav@1258	2849	}
jaroslav@1258	2850
jaroslav@1258	2851	// Return the char at position p,
jaroslav@1258	2852	// assuming that p < input.length()
jaroslav@1258	2853	//
jaroslav@1258	2854	private char charAt(int p) {
jaroslav@1258	2855	return input.charAt(p);
jaroslav@1258	2856	}
jaroslav@1258	2857
jaroslav@1258	2858	// Tells whether start < end and, if so, whether charAt(start) == c
jaroslav@1258	2859	//
jaroslav@1258	2860	private boolean at(int start, int end, char c) {
jaroslav@1258	2861	return (start < end) && (charAt(start) == c);
jaroslav@1258	2862	}
jaroslav@1258	2863
jaroslav@1258	2864	// Tells whether start + s.length() < end and, if so,
jaroslav@1258	2865	// whether the chars at the start position match s exactly
jaroslav@1258	2866	//
jaroslav@1258	2867	private boolean at(int start, int end, String s) {
jaroslav@1258	2868	int p = start;
jaroslav@1258	2869	int sn = s.length();
jaroslav@1258	2870	if (sn > end - p)
jaroslav@1258	2871	return false;
jaroslav@1258	2872	int i = 0;
jaroslav@1258	2873	while (i < sn) {
jaroslav@1258	2874	if (charAt(p++) != s.charAt(i)) {
jaroslav@1258	2875	break;
jaroslav@1258	2876	}
jaroslav@1258	2877	i++;
jaroslav@1258	2878	}
jaroslav@1258	2879	return (i == sn);
jaroslav@1258	2880	}
jaroslav@1258	2881
jaroslav@1258	2882
jaroslav@1258	2883	// -- Scanning --
jaroslav@1258	2884
jaroslav@1258	2885	// The various scan and parse methods that follow use a uniform
jaroslav@1258	2886	// convention of taking the current start position and end index as
jaroslav@1258	2887	// their first two arguments. The start is inclusive while the end is
jaroslav@1258	2888	// exclusive, just as in the String class, i.e., a start/end pair
jaroslav@1258	2889	// denotes the left-open interval [start, end) of the input string.
jaroslav@1258	2890	//
jaroslav@1258	2891	// These methods never proceed past the end position. They may return
jaroslav@1258	2892	// -1 to indicate outright failure, but more often they simply return
jaroslav@1258	2893	// the position of the first char after the last char scanned. Thus
jaroslav@1258	2894	// a typical idiom is
jaroslav@1258	2895	//
jaroslav@1258	2896	// int p = start;
jaroslav@1258	2897	// int q = scan(p, end, ...);
jaroslav@1258	2898	// if (q > p)
jaroslav@1258	2899	// // We scanned something
jaroslav@1258	2900	// ...;
jaroslav@1258	2901	// else if (q == p)
jaroslav@1258	2902	// // We scanned nothing
jaroslav@1258	2903	// ...;
jaroslav@1258	2904	// else if (q == -1)
jaroslav@1258	2905	// // Something went wrong
jaroslav@1258	2906	// ...;
jaroslav@1258	2907
jaroslav@1258	2908
jaroslav@1258	2909	// Scan a specific char: If the char at the given start position is
jaroslav@1258	2910	// equal to c, return the index of the next char; otherwise, return the
jaroslav@1258	2911	// start position.
jaroslav@1258	2912	//
jaroslav@1258	2913	private int scan(int start, int end, char c) {
jaroslav@1258	2914	if ((start < end) && (charAt(start) == c))
jaroslav@1258	2915	return start + 1;
jaroslav@1258	2916	return start;
jaroslav@1258	2917	}
jaroslav@1258	2918
jaroslav@1258	2919	// Scan forward from the given start position. Stop at the first char
jaroslav@1258	2920	// in the err string (in which case -1 is returned), or the first char
jaroslav@1258	2921	// in the stop string (in which case the index of the preceding char is
jaroslav@1258	2922	// returned), or the end of the input string (in which case the length
jaroslav@1258	2923	// of the input string is returned). May return the start position if
jaroslav@1258	2924	// nothing matches.
jaroslav@1258	2925	//
jaroslav@1258	2926	private int scan(int start, int end, String err, String stop) {
jaroslav@1258	2927	int p = start;
jaroslav@1258	2928	while (p < end) {
jaroslav@1258	2929	char c = charAt(p);
jaroslav@1258	2930	if (err.indexOf(c) >= 0)
jaroslav@1258	2931	return -1;
jaroslav@1258	2932	if (stop.indexOf(c) >= 0)
jaroslav@1258	2933	break;
jaroslav@1258	2934	p++;
jaroslav@1258	2935	}
jaroslav@1258	2936	return p;
jaroslav@1258	2937	}
jaroslav@1258	2938
jaroslav@1258	2939	// Scan a potential escape sequence, starting at the given position,
jaroslav@1258	2940	// with the given first char (i.e., charAt(start) == c).
jaroslav@1258	2941	//
jaroslav@1258	2942	// This method assumes that if escapes are allowed then visible
jaroslav@1258	2943	// non-US-ASCII chars are also allowed.
jaroslav@1258	2944	//
jaroslav@1258	2945	private int scanEscape(int start, int n, char first)
jaroslav@1258	2946	throws URISyntaxException
jaroslav@1258	2947	{
jaroslav@1258	2948	int p = start;
jaroslav@1258	2949	char c = first;
jaroslav@1258	2950	if (c == '%') {
jaroslav@1258	2951	// Process escape pair
jaroslav@1258	2952	if ((p + 3 <= n)
jaroslav@1258	2953	&& match(charAt(p + 1), L_HEX, H_HEX)
jaroslav@1258	2954	&& match(charAt(p + 2), L_HEX, H_HEX)) {
jaroslav@1258	2955	return p + 3;
jaroslav@1258	2956	}
jaroslav@1258	2957	fail("Malformed escape pair", p);
jaroslav@1258	2958	} else if ((c > 128)
jaroslav@1258	2959	&& !Character.isSpaceChar(c)
jaroslav@1258	2960	&& !Character.isISOControl(c)) {
jaroslav@1258	2961	// Allow unescaped but visible non-US-ASCII chars
jaroslav@1258	2962	return p + 1;
jaroslav@1258	2963	}
jaroslav@1258	2964	return p;
jaroslav@1258	2965	}
jaroslav@1258	2966
jaroslav@1258	2967	// Scan chars that match the given mask pair
jaroslav@1258	2968	//
jaroslav@1258	2969	private int scan(int start, int n, long lowMask, long highMask)
jaroslav@1258	2970	throws URISyntaxException
jaroslav@1258	2971	{
jaroslav@1258	2972	int p = start;
jaroslav@1258	2973	while (p < n) {
jaroslav@1258	2974	char c = charAt(p);
jaroslav@1258	2975	if (match(c, lowMask, highMask)) {
jaroslav@1258	2976	p++;
jaroslav@1258	2977	continue;
jaroslav@1258	2978	}
jaroslav@1258	2979	if ((lowMask & L_ESCAPED) != 0) {
jaroslav@1258	2980	int q = scanEscape(p, n, c);
jaroslav@1258	2981	if (q > p) {
jaroslav@1258	2982	p = q;
jaroslav@1258	2983	continue;
jaroslav@1258	2984	}
jaroslav@1258	2985	}
jaroslav@1258	2986	break;
jaroslav@1258	2987	}
jaroslav@1258	2988	return p;
jaroslav@1258	2989	}
jaroslav@1258	2990
jaroslav@1258	2991	// Check that each of the chars in [start, end) matches the given mask
jaroslav@1258	2992	//
jaroslav@1258	2993	private void checkChars(int start, int end,
jaroslav@1258	2994	long lowMask, long highMask,
jaroslav@1258	2995	String what)
jaroslav@1258	2996	throws URISyntaxException
jaroslav@1258	2997	{
jaroslav@1258	2998	int p = scan(start, end, lowMask, highMask);
jaroslav@1258	2999	if (p < end)
jaroslav@1258	3000	fail("Illegal character in " + what, p);
jaroslav@1258	3001	}
jaroslav@1258	3002
jaroslav@1258	3003	// Check that the char at position p matches the given mask
jaroslav@1258	3004	//
jaroslav@1258	3005	private void checkChar(int p,
jaroslav@1258	3006	long lowMask, long highMask,
jaroslav@1258	3007	String what)
jaroslav@1258	3008	throws URISyntaxException
jaroslav@1258	3009	{
jaroslav@1258	3010	checkChars(p, p + 1, lowMask, highMask, what);
jaroslav@1258	3011	}
jaroslav@1258	3012
jaroslav@1258	3013
jaroslav@1258	3014	// -- Parsing --
jaroslav@1258	3015
jaroslav@1258	3016	// [<scheme>:]<scheme-specific-part>[#<fragment>]
jaroslav@1258	3017	//
jaroslav@1258	3018	void parse(boolean rsa) throws URISyntaxException {
jaroslav@1258	3019	requireServerAuthority = rsa;
jaroslav@1258	3020	int ssp; // Start of scheme-specific part
jaroslav@1258	3021	int n = input.length();
jaroslav@1258	3022	int p = scan(0, n, "/?#", ":");
jaroslav@1258	3023	if ((p >= 0) && at(p, n, ':')) {
jaroslav@1258	3024	if (p == 0)
jaroslav@1258	3025	failExpecting("scheme name", 0);
jaroslav@1258	3026	checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
jaroslav@1258	3027	checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
jaroslav@1258	3028	scheme = substring(0, p);
jaroslav@1258	3029	p++; // Skip ':'
jaroslav@1258	3030	ssp = p;
jaroslav@1258	3031	if (at(p, n, '/')) {
jaroslav@1258	3032	p = parseHierarchical(p, n);
jaroslav@1258	3033	} else {
jaroslav@1258	3034	int q = scan(p, n, "", "#");
jaroslav@1258	3035	if (q <= p)
jaroslav@1258	3036	failExpecting("scheme-specific part", p);
jaroslav@1258	3037	checkChars(p, q, L_URIC, H_URIC, "opaque part");
jaroslav@1258	3038	p = q;
jaroslav@1258	3039	}
jaroslav@1258	3040	} else {
jaroslav@1258	3041	ssp = 0;
jaroslav@1258	3042	p = parseHierarchical(0, n);
jaroslav@1258	3043	}
jaroslav@1258	3044	schemeSpecificPart = substring(ssp, p);
jaroslav@1258	3045	if (at(p, n, '#')) {
jaroslav@1258	3046	checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
jaroslav@1258	3047	fragment = substring(p + 1, n);
jaroslav@1258	3048	p = n;
jaroslav@1258	3049	}
jaroslav@1258	3050	if (p < n)
jaroslav@1258	3051	fail("end of URI", p);
jaroslav@1258	3052	}
jaroslav@1258	3053
jaroslav@1258	3054	// [//authority]<path>[?<query>]
jaroslav@1258	3055	//
jaroslav@1258	3056	// DEVIATION from RFC2396: We allow an empty authority component as
jaroslav@1258	3057	// long as it's followed by a non-empty path, query component, or
jaroslav@1258	3058	// fragment component. This is so that URIs such as "file:///foo/bar"
jaroslav@1258	3059	// will parse. This seems to be the intent of RFC2396, though the
jaroslav@1258	3060	// grammar does not permit it. If the authority is empty then the
jaroslav@1258	3061	// userInfo, host, and port components are undefined.
jaroslav@1258	3062	//
jaroslav@1258	3063	// DEVIATION from RFC2396: We allow empty relative paths. This seems
jaroslav@1258	3064	// to be the intent of RFC2396, but the grammar does not permit it.
jaroslav@1258	3065	// The primary consequence of this deviation is that "#f" parses as a
jaroslav@1258	3066	// relative URI with an empty path.
jaroslav@1258	3067	//
jaroslav@1258	3068	private int parseHierarchical(int start, int n)
jaroslav@1258	3069	throws URISyntaxException
jaroslav@1258	3070	{
jaroslav@1258	3071	int p = start;
jaroslav@1258	3072	if (at(p, n, '/') && at(p + 1, n, '/')) {
jaroslav@1258	3073	p += 2;
jaroslav@1258	3074	int q = scan(p, n, "", "/?#");
jaroslav@1258	3075	if (q > p) {
jaroslav@1258	3076	p = parseAuthority(p, q);
jaroslav@1258	3077	} else if (q < n) {
jaroslav@1258	3078	// DEVIATION: Allow empty authority prior to non-empty
jaroslav@1258	3079	// path, query component or fragment identifier
jaroslav@1258	3080	} else
jaroslav@1258	3081	failExpecting("authority", p);
jaroslav@1258	3082	}
jaroslav@1258	3083	int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
jaroslav@1258	3084	checkChars(p, q, L_PATH, H_PATH, "path");
jaroslav@1258	3085	path = substring(p, q);
jaroslav@1258	3086	p = q;
jaroslav@1258	3087	if (at(p, n, '?')) {
jaroslav@1258	3088	p++;
jaroslav@1258	3089	q = scan(p, n, "", "#");
jaroslav@1258	3090	checkChars(p, q, L_URIC, H_URIC, "query");
jaroslav@1258	3091	query = substring(p, q);
jaroslav@1258	3092	p = q;
jaroslav@1258	3093	}
jaroslav@1258	3094	return p;
jaroslav@1258	3095	}
jaroslav@1258	3096
jaroslav@1258	3097	// authority = server \| reg_name
jaroslav@1258	3098	//
jaroslav@1258	3099	// Ambiguity: An authority that is a registry name rather than a server
jaroslav@1258	3100	// might have a prefix that parses as a server. We use the fact that
jaroslav@1258	3101	// the authority component is always followed by '/' or the end of the
jaroslav@1258	3102	// input string to resolve this: If the complete authority did not
jaroslav@1258	3103	// parse as a server then we try to parse it as a registry name.
jaroslav@1258	3104	//
jaroslav@1258	3105	private int parseAuthority(int start, int n)
jaroslav@1258	3106	throws URISyntaxException
jaroslav@1258	3107	{
jaroslav@1258	3108	int p = start;
jaroslav@1258	3109	int q = p;
jaroslav@1258	3110	URISyntaxException ex = null;
jaroslav@1258	3111
jaroslav@1258	3112	boolean serverChars;
jaroslav@1258	3113	boolean regChars;
jaroslav@1258	3114
jaroslav@1258	3115	if (scan(p, n, "", "]") > p) {
jaroslav@1258	3116	// contains a literal IPv6 address, therefore % is allowed
jaroslav@1258	3117	serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
jaroslav@1258	3118	} else {
jaroslav@1258	3119	serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
jaroslav@1258	3120	}
jaroslav@1258	3121	regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
jaroslav@1258	3122
jaroslav@1258	3123	if (regChars && !serverChars) {
jaroslav@1258	3124	// Must be a registry-based authority
jaroslav@1258	3125	authority = substring(p, n);
jaroslav@1258	3126	return n;
jaroslav@1258	3127	}
jaroslav@1258	3128
jaroslav@1258	3129	if (serverChars) {
jaroslav@1258	3130	// Might be (probably is) a server-based authority, so attempt
jaroslav@1258	3131	// to parse it as such. If the attempt fails, try to treat it
jaroslav@1258	3132	// as a registry-based authority.
jaroslav@1258	3133	try {
jaroslav@1258	3134	q = parseServer(p, n);
jaroslav@1258	3135	if (q < n)
jaroslav@1258	3136	failExpecting("end of authority", q);
jaroslav@1258	3137	authority = substring(p, n);
jaroslav@1258	3138	} catch (URISyntaxException x) {
jaroslav@1258	3139	// Undo results of failed parse
jaroslav@1258	3140	userInfo = null;
jaroslav@1258	3141	host = null;
jaroslav@1258	3142	port = -1;
jaroslav@1258	3143	if (requireServerAuthority) {
jaroslav@1258	3144	// If we're insisting upon a server-based authority,
jaroslav@1258	3145	// then just re-throw the exception
jaroslav@1258	3146	throw x;
jaroslav@1258	3147	} else {
jaroslav@1258	3148	// Save the exception in case it doesn't parse as a
jaroslav@1258	3149	// registry either
jaroslav@1258	3150	ex = x;
jaroslav@1258	3151	q = p;
jaroslav@1258	3152	}
jaroslav@1258	3153	}
jaroslav@1258	3154	}
jaroslav@1258	3155
jaroslav@1258	3156	if (q < n) {
jaroslav@1258	3157	if (regChars) {
jaroslav@1258	3158	// Registry-based authority
jaroslav@1258	3159	authority = substring(p, n);
jaroslav@1258	3160	} else if (ex != null) {
jaroslav@1258	3161	// Re-throw exception; it was probably due to
jaroslav@1258	3162	// a malformed IPv6 address
jaroslav@1258	3163	throw ex;
jaroslav@1258	3164	} else {
jaroslav@1258	3165	fail("Illegal character in authority", q);
jaroslav@1258	3166	}
jaroslav@1258	3167	}
jaroslav@1258	3168
jaroslav@1258	3169	return n;
jaroslav@1258	3170	}
jaroslav@1258	3171
jaroslav@1258	3172
jaroslav@1258	3173	// [<userinfo>@]<host>[:<port>]
jaroslav@1258	3174	//
jaroslav@1258	3175	private int parseServer(int start, int n)
jaroslav@1258	3176	throws URISyntaxException
jaroslav@1258	3177	{
jaroslav@1258	3178	int p = start;
jaroslav@1258	3179	int q;
jaroslav@1258	3180
jaroslav@1258	3181	// userinfo
jaroslav@1258	3182	q = scan(p, n, "/?#", "@");
jaroslav@1258	3183	if ((q >= p) && at(q, n, '@')) {
jaroslav@1258	3184	checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
jaroslav@1258	3185	userInfo = substring(p, q);
jaroslav@1258	3186	p = q + 1; // Skip '@'
jaroslav@1258	3187	}
jaroslav@1258	3188
jaroslav@1258	3189	// hostname, IPv4 address, or IPv6 address
jaroslav@1258	3190	if (at(p, n, '[')) {
jaroslav@1258	3191	// DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
jaroslav@1258	3192	p++;
jaroslav@1258	3193	q = scan(p, n, "/?#", "]");
jaroslav@1258	3194	if ((q > p) && at(q, n, ']')) {
jaroslav@1258	3195	// look for a "%" scope id
jaroslav@1258	3196	int r = scan (p, q, "", "%");
jaroslav@1258	3197	if (r > p) {
jaroslav@1258	3198	parseIPv6Reference(p, r);
jaroslav@1258	3199	if (r+1 == q) {
jaroslav@1258	3200	fail ("scope id expected");
jaroslav@1258	3201	}
jaroslav@1258	3202	checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
jaroslav@1258	3203	"scope id");
jaroslav@1258	3204	} else {
jaroslav@1258	3205	parseIPv6Reference(p, q);
jaroslav@1258	3206	}
jaroslav@1258	3207	host = substring(p-1, q+1);
jaroslav@1258	3208	p = q + 1;
jaroslav@1258	3209	} else {
jaroslav@1258	3210	failExpecting("closing bracket for IPv6 address", q);
jaroslav@1258	3211	}
jaroslav@1258	3212	} else {
jaroslav@1258	3213	q = parseIPv4Address(p, n);
jaroslav@1258	3214	if (q <= p)
jaroslav@1258	3215	q = parseHostname(p, n);
jaroslav@1258	3216	p = q;
jaroslav@1258	3217	}
jaroslav@1258	3218
jaroslav@1258	3219	// port
jaroslav@1258	3220	if (at(p, n, ':')) {
jaroslav@1258	3221	p++;
jaroslav@1258	3222	q = scan(p, n, "", "/");
jaroslav@1258	3223	if (q > p) {
jaroslav@1258	3224	checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
jaroslav@1258	3225	try {
jaroslav@1258	3226	port = Integer.parseInt(substring(p, q));
jaroslav@1258	3227	} catch (NumberFormatException x) {
jaroslav@1258	3228	fail("Malformed port number", p);
jaroslav@1258	3229	}
jaroslav@1258	3230	p = q;
jaroslav@1258	3231	}
jaroslav@1258	3232	}
jaroslav@1258	3233	if (p < n)
jaroslav@1258	3234	failExpecting("port number", p);
jaroslav@1258	3235
jaroslav@1258	3236	return p;
jaroslav@1258	3237	}
jaroslav@1258	3238
jaroslav@1258	3239	// Scan a string of decimal digits whose value fits in a byte
jaroslav@1258	3240	//
jaroslav@1258	3241	private int scanByte(int start, int n)
jaroslav@1258	3242	throws URISyntaxException
jaroslav@1258	3243	{
jaroslav@1258	3244	int p = start;
jaroslav@1258	3245	int q = scan(p, n, L_DIGIT, H_DIGIT);
jaroslav@1258	3246	if (q <= p) return q;
jaroslav@1258	3247	if (Integer.parseInt(substring(p, q)) > 255) return p;
jaroslav@1258	3248	return q;
jaroslav@1258	3249	}
jaroslav@1258	3250
jaroslav@1258	3251	// Scan an IPv4 address.
jaroslav@1258	3252	//
jaroslav@1258	3253	// If the strict argument is true then we require that the given
jaroslav@1258	3254	// interval contain nothing besides an IPv4 address; if it is false
jaroslav@1258	3255	// then we only require that it start with an IPv4 address.
jaroslav@1258	3256	//
jaroslav@1258	3257	// If the interval does not contain or start with (depending upon the
jaroslav@1258	3258	// strict argument) a legal IPv4 address characters then we return -1
jaroslav@1258	3259	// immediately; otherwise we insist that these characters parse as a
jaroslav@1258	3260	// legal IPv4 address and throw an exception on failure.
jaroslav@1258	3261	//
jaroslav@1258	3262	// We assume that any string of decimal digits and dots must be an IPv4
jaroslav@1258	3263	// address. It won't parse as a hostname anyway, so making that
jaroslav@1258	3264	// assumption here allows more meaningful exceptions to be thrown.
jaroslav@1258	3265	//
jaroslav@1258	3266	private int scanIPv4Address(int start, int n, boolean strict)
jaroslav@1258	3267	throws URISyntaxException
jaroslav@1258	3268	{
jaroslav@1258	3269	int p = start;
jaroslav@1258	3270	int q;
jaroslav@1258	3271	int m = scan(p, n, L_DIGIT \| L_DOT, H_DIGIT \| H_DOT);
jaroslav@1258	3272	if ((m <= p) \|\| (strict && (m != n)))
jaroslav@1258	3273	return -1;
jaroslav@1258	3274	for (;;) {
jaroslav@1258	3275	// Per RFC2732: At most three digits per byte
jaroslav@1258	3276	// Further constraint: Each element fits in a byte
jaroslav@1258	3277	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3278	if ((q = scan(p, m, '.')) <= p) break; p = q;
jaroslav@1258	3279	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3280	if ((q = scan(p, m, '.')) <= p) break; p = q;
jaroslav@1258	3281	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3282	if ((q = scan(p, m, '.')) <= p) break; p = q;
jaroslav@1258	3283	if ((q = scanByte(p, m)) <= p) break; p = q;
jaroslav@1258	3284	if (q < m) break;
jaroslav@1258	3285	return q;
jaroslav@1258	3286	}
jaroslav@1258	3287	fail("Malformed IPv4 address", q);
jaroslav@1258	3288	return -1;
jaroslav@1258	3289	}
jaroslav@1258	3290
jaroslav@1258	3291	// Take an IPv4 address: Throw an exception if the given interval
jaroslav@1258	3292	// contains anything except an IPv4 address
jaroslav@1258	3293	//
jaroslav@1258	3294	private int takeIPv4Address(int start, int n, String expected)
jaroslav@1258	3295	throws URISyntaxException
jaroslav@1258	3296	{
jaroslav@1258	3297	int p = scanIPv4Address(start, n, true);
jaroslav@1258	3298	if (p <= start)
jaroslav@1258	3299	failExpecting(expected, start);
jaroslav@1258	3300	return p;
jaroslav@1258	3301	}
jaroslav@1258	3302
jaroslav@1258	3303	// Attempt to parse an IPv4 address, returning -1 on failure but
jaroslav@1258	3304	// allowing the given interval to contain [:<characters>] after
jaroslav@1258	3305	// the IPv4 address.
jaroslav@1258	3306	//
jaroslav@1258	3307	private int parseIPv4Address(int start, int n) {
jaroslav@1258	3308	int p;
jaroslav@1258	3309
jaroslav@1258	3310	try {
jaroslav@1258	3311	p = scanIPv4Address(start, n, false);
jaroslav@1258	3312	} catch (URISyntaxException x) {
jaroslav@1258	3313	return -1;
jaroslav@1258	3314	} catch (NumberFormatException nfe) {
jaroslav@1258	3315	return -1;
jaroslav@1258	3316	}
jaroslav@1258	3317
jaroslav@1258	3318	if (p > start && p < n) {
jaroslav@1258	3319	// IPv4 address is followed by something - check that
jaroslav@1258	3320	// it's a ":" as this is the only valid character to
jaroslav@1258	3321	// follow an address.
jaroslav@1258	3322	if (charAt(p) != ':') {
jaroslav@1258	3323	p = -1;
jaroslav@1258	3324	}
jaroslav@1258	3325	}
jaroslav@1258	3326
jaroslav@1258	3327	if (p > start)
jaroslav@1258	3328	host = substring(start, p);
jaroslav@1258	3329
jaroslav@1258	3330	return p;
jaroslav@1258	3331	}
jaroslav@1258	3332
jaroslav@1258	3333	// hostname = domainlabel [ "." ] \| 1*( domainlabel "." ) toplabel [ "." ]
jaroslav@1258	3334	// domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum
jaroslav@1258	3335	// toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum
jaroslav@1258	3336	//
jaroslav@1258	3337	private int parseHostname(int start, int n)
jaroslav@1258	3338	throws URISyntaxException
jaroslav@1258	3339	{
jaroslav@1258	3340	int p = start;
jaroslav@1258	3341	int q;
jaroslav@1258	3342	int l = -1; // Start of last parsed label
jaroslav@1258	3343
jaroslav@1258	3344	do {
jaroslav@1258	3345	// domainlabel = alphanum [ *( alphanum \| "-" ) alphanum ]
jaroslav@1258	3346	q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
jaroslav@1258	3347	if (q <= p)
jaroslav@1258	3348	break;
jaroslav@1258	3349	l = p;
jaroslav@1258	3350	if (q > p) {
jaroslav@1258	3351	p = q;
jaroslav@1258	3352	q = scan(p, n, L_ALPHANUM \| L_DASH, H_ALPHANUM \| H_DASH);
jaroslav@1258	3353	if (q > p) {
jaroslav@1258	3354	if (charAt(q - 1) == '-')
jaroslav@1258	3355	fail("Illegal character in hostname", q - 1);
jaroslav@1258	3356	p = q;
jaroslav@1258	3357	}
jaroslav@1258	3358	}
jaroslav@1258	3359	q = scan(p, n, '.');
jaroslav@1258	3360	if (q <= p)
jaroslav@1258	3361	break;
jaroslav@1258	3362	p = q;
jaroslav@1258	3363	} while (p < n);
jaroslav@1258	3364
jaroslav@1258	3365	if ((p < n) && !at(p, n, ':'))
jaroslav@1258	3366	fail("Illegal character in hostname", p);
jaroslav@1258	3367
jaroslav@1258	3368	if (l < 0)
jaroslav@1258	3369	failExpecting("hostname", start);
jaroslav@1258	3370
jaroslav@1258	3371	// for a fully qualified hostname check that the rightmost
jaroslav@1258	3372	// label starts with an alpha character.
jaroslav@1258	3373	if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
jaroslav@1258	3374	fail("Illegal character in hostname", l);
jaroslav@1258	3375	}
jaroslav@1258	3376
jaroslav@1258	3377	host = substring(start, p);
jaroslav@1258	3378	return p;
jaroslav@1258	3379	}
jaroslav@1258	3380
jaroslav@1258	3381
jaroslav@1258	3382	// IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
jaroslav@1258	3383	//
jaroslav@1258	3384	// Bug: The grammar in RFC2373 Appendix B does not allow addresses of
jaroslav@1258	3385	// the form ::12.34.56.78, which are clearly shown in the examples
jaroslav@1258	3386	// earlier in the document. Here is the original grammar:
jaroslav@1258	3387	//
jaroslav@1258	3388	// IPv6address = hexpart [ ":" IPv4address ]
jaroslav@1258	3389	// hexpart = hexseq \| hexseq "::" [ hexseq ] \| "::" [ hexseq ]
jaroslav@1258	3390	// hexseq = hex4 *( ":" hex4)
jaroslav@1258	3391	// hex4 = 1*4HEXDIG
jaroslav@1258	3392	//
jaroslav@1258	3393	// We therefore use the following revised grammar:
jaroslav@1258	3394	//
jaroslav@1258	3395	// IPv6address = hexseq [ ":" IPv4address ]
jaroslav@1258	3396	// \| hexseq [ "::" [ hexpost ] ]
jaroslav@1258	3397	// \| "::" [ hexpost ]
jaroslav@1258	3398	// hexpost = hexseq \| hexseq ":" IPv4address \| IPv4address
jaroslav@1258	3399	// hexseq = hex4 *( ":" hex4)
jaroslav@1258	3400	// hex4 = 1*4HEXDIG
jaroslav@1258	3401	//
jaroslav@1258	3402	// This covers all and only the following cases:
jaroslav@1258	3403	//
jaroslav@1258	3404	// hexseq
jaroslav@1258	3405	// hexseq : IPv4address
jaroslav@1258	3406	// hexseq ::
jaroslav@1258	3407	// hexseq :: hexseq
jaroslav@1258	3408	// hexseq :: hexseq : IPv4address
jaroslav@1258	3409	// hexseq :: IPv4address
jaroslav@1258	3410	// :: hexseq
jaroslav@1258	3411	// :: hexseq : IPv4address
jaroslav@1258	3412	// :: IPv4address
jaroslav@1258	3413	// ::
jaroslav@1258	3414	//
jaroslav@1258	3415	// Additionally we constrain the IPv6 address as follows :-
jaroslav@1258	3416	//
jaroslav@1258	3417	// i. IPv6 addresses without compressed zeros should contain
jaroslav@1258	3418	// exactly 16 bytes.
jaroslav@1258	3419	//
jaroslav@1258	3420	// ii. IPv6 addresses with compressed zeros should contain
jaroslav@1258	3421	// less than 16 bytes.
jaroslav@1258	3422
jaroslav@1258	3423	private int ipv6byteCount = 0;
jaroslav@1258	3424
jaroslav@1258	3425	private int parseIPv6Reference(int start, int n)
jaroslav@1258	3426	throws URISyntaxException
jaroslav@1258	3427	{
jaroslav@1258	3428	int p = start;
jaroslav@1258	3429	int q;
jaroslav@1258	3430	boolean compressedZeros = false;
jaroslav@1258	3431
jaroslav@1258	3432	q = scanHexSeq(p, n);
jaroslav@1258	3433
jaroslav@1258	3434	if (q > p) {
jaroslav@1258	3435	p = q;
jaroslav@1258	3436	if (at(p, n, "::")) {
jaroslav@1258	3437	compressedZeros = true;
jaroslav@1258	3438	p = scanHexPost(p + 2, n);
jaroslav@1258	3439	} else if (at(p, n, ':')) {
jaroslav@1258	3440	p = takeIPv4Address(p + 1, n, "IPv4 address");
jaroslav@1258	3441	ipv6byteCount += 4;
jaroslav@1258	3442	}
jaroslav@1258	3443	} else if (at(p, n, "::")) {
jaroslav@1258	3444	compressedZeros = true;
jaroslav@1258	3445	p = scanHexPost(p + 2, n);
jaroslav@1258	3446	}
jaroslav@1258	3447	if (p < n)
jaroslav@1258	3448	fail("Malformed IPv6 address", start);
jaroslav@1258	3449	if (ipv6byteCount > 16)
jaroslav@1258	3450	fail("IPv6 address too long", start);
jaroslav@1258	3451	if (!compressedZeros && ipv6byteCount < 16)
jaroslav@1258	3452	fail("IPv6 address too short", start);
jaroslav@1258	3453	if (compressedZeros && ipv6byteCount == 16)
jaroslav@1258	3454	fail("Malformed IPv6 address", start);
jaroslav@1258	3455
jaroslav@1258	3456	return p;
jaroslav@1258	3457	}
jaroslav@1258	3458
jaroslav@1258	3459	private int scanHexPost(int start, int n)
jaroslav@1258	3460	throws URISyntaxException
jaroslav@1258	3461	{
jaroslav@1258	3462	int p = start;
jaroslav@1258	3463	int q;
jaroslav@1258	3464
jaroslav@1258	3465	if (p == n)
jaroslav@1258	3466	return p;
jaroslav@1258	3467
jaroslav@1258	3468	q = scanHexSeq(p, n);
jaroslav@1258	3469	if (q > p) {
jaroslav@1258	3470	p = q;
jaroslav@1258	3471	if (at(p, n, ':')) {
jaroslav@1258	3472	p++;
jaroslav@1258	3473	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
jaroslav@1258	3474	ipv6byteCount += 4;
jaroslav@1258	3475	}
jaroslav@1258	3476	} else {
jaroslav@1258	3477	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
jaroslav@1258	3478	ipv6byteCount += 4;
jaroslav@1258	3479	}
jaroslav@1258	3480	return p;
jaroslav@1258	3481	}
jaroslav@1258	3482
jaroslav@1258	3483	// Scan a hex sequence; return -1 if one could not be scanned
jaroslav@1258	3484	//
jaroslav@1258	3485	private int scanHexSeq(int start, int n)
jaroslav@1258	3486	throws URISyntaxException
jaroslav@1258	3487	{
jaroslav@1258	3488	int p = start;
jaroslav@1258	3489	int q;
jaroslav@1258	3490
jaroslav@1258	3491	q = scan(p, n, L_HEX, H_HEX);
jaroslav@1258	3492	if (q <= p)
jaroslav@1258	3493	return -1;
jaroslav@1258	3494	if (at(q, n, '.')) // Beginning of IPv4 address
jaroslav@1258	3495	return -1;
jaroslav@1258	3496	if (q > p + 4)
jaroslav@1258	3497	fail("IPv6 hexadecimal digit sequence too long", p);
jaroslav@1258	3498	ipv6byteCount += 2;
jaroslav@1258	3499	p = q;
jaroslav@1258	3500	while (p < n) {
jaroslav@1258	3501	if (!at(p, n, ':'))
jaroslav@1258	3502	break;
jaroslav@1258	3503	if (at(p + 1, n, ':'))
jaroslav@1258	3504	break; // "::"
jaroslav@1258	3505	p++;
jaroslav@1258	3506	q = scan(p, n, L_HEX, H_HEX);
jaroslav@1258	3507	if (q <= p)
jaroslav@1258	3508	failExpecting("digits for an IPv6 address", p);
jaroslav@1258	3509	if (at(q, n, '.')) { // Beginning of IPv4 address
jaroslav@1258	3510	p--;
jaroslav@1258	3511	break;
jaroslav@1258	3512	}
jaroslav@1258	3513	if (q > p + 4)
jaroslav@1258	3514	fail("IPv6 hexadecimal digit sequence too long", p);
jaroslav@1258	3515	ipv6byteCount += 2;
jaroslav@1258	3516	p = q;
jaroslav@1258	3517	}
jaroslav@1258	3518
jaroslav@1258	3519	return p;
jaroslav@1258	3520	}
jaroslav@1258	3521
jaroslav@1258	3522	}
jaroslav@1258	3523
jaroslav@1258	3524	}

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Sat, 07 Sep 2013 13:51:24 +0200
branch	jdk7-b147
changeset 1258	724f3e1ea53e
permissions	-rw-r--r--