rt/emul/compact/src/main/java/java/util/regex/Matcher.java
author Jaroslav Tulach <jtulach@netbeans.org>
Mon, 07 Oct 2013 16:13:27 +0200
branchjdk7-b147
changeset 1348 bca65655b36b
permissions -rw-r--r--
Adding RegEx implementation
     1 /*
     2  * Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.util.regex;
    27 
    28 
    29 /**
    30  * An engine that performs match operations on a {@link java.lang.CharSequence
    31  * </code>character sequence<code>} by interpreting a {@link Pattern}.
    32  *
    33  * <p> A matcher is created from a pattern by invoking the pattern's {@link
    34  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
    35  * perform three different kinds of match operations:
    36  *
    37  * <ul>
    38  *
    39  *   <li><p> The {@link #matches matches} method attempts to match the entire
    40  *   input sequence against the pattern.  </p></li>
    41  *
    42  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
    43  *   input sequence, starting at the beginning, against the pattern.  </p></li>
    44  *
    45  *   <li><p> The {@link #find find} method scans the input sequence looking for
    46  *   the next subsequence that matches the pattern.  </p></li>
    47  *
    48  * </ul>
    49  *
    50  * <p> Each of these methods returns a boolean indicating success or failure.
    51  * More information about a successful match can be obtained by querying the
    52  * state of the matcher.
    53  *
    54  * <p> A matcher finds matches in a subset of its input called the
    55  * <i>region</i>. By default, the region contains all of the matcher's input.
    56  * The region can be modified via the{@link #region region} method and queried
    57  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
    58  * methods. The way that the region boundaries interact with some pattern
    59  * constructs can be changed. See {@link #useAnchoringBounds
    60  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
    61  * for more details.
    62  *
    63  * <p> This class also defines methods for replacing matched subsequences with
    64  * new strings whose contents can, if desired, be computed from the match
    65  * result.  The {@link #appendReplacement appendReplacement} and {@link
    66  * #appendTail appendTail} methods can be used in tandem in order to collect
    67  * the result into an existing string buffer, or the more convenient {@link
    68  * #replaceAll replaceAll} method can be used to create a string in which every
    69  * matching subsequence in the input sequence is replaced.
    70  *
    71  * <p> The explicit state of a matcher includes the start and end indices of
    72  * the most recent successful match.  It also includes the start and end
    73  * indices of the input subsequence captured by each <a
    74  * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
    75  * count of such subsequences.  As a convenience, methods are also provided for
    76  * returning these captured subsequences in string form.
    77  *
    78  * <p> The explicit state of a matcher is initially undefined; attempting to
    79  * query any part of it before a successful match will cause an {@link
    80  * IllegalStateException} to be thrown.  The explicit state of a matcher is
    81  * recomputed by every match operation.
    82  *
    83  * <p> The implicit state of a matcher includes the input character sequence as
    84  * well as the <i>append position</i>, which is initially zero and is updated
    85  * by the {@link #appendReplacement appendReplacement} method.
    86  *
    87  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
    88  * method or, if a new input sequence is desired, its {@link
    89  * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
    90  * matcher discards its explicit state information and sets the append position
    91  * to zero.
    92  *
    93  * <p> Instances of this class are not safe for use by multiple concurrent
    94  * threads. </p>
    95  *
    96  *
    97  * @author      Mike McCloskey
    98  * @author      Mark Reinhold
    99  * @author      JSR-51 Expert Group
   100  * @since       1.4
   101  * @spec        JSR-51
   102  */
   103 
   104 public final class Matcher implements MatchResult {
   105 
   106     /**
   107      * The Pattern object that created this Matcher.
   108      */
   109     Pattern parentPattern;
   110 
   111     /**
   112      * The storage used by groups. They may contain invalid values if
   113      * a group was skipped during the matching.
   114      */
   115     int[] groups;
   116 
   117     /**
   118      * The range within the sequence that is to be matched. Anchors
   119      * will match at these "hard" boundaries. Changing the region
   120      * changes these values.
   121      */
   122     int from, to;
   123 
   124     /**
   125      * Lookbehind uses this value to ensure that the subexpression
   126      * match ends at the point where the lookbehind was encountered.
   127      */
   128     int lookbehindTo;
   129 
   130     /**
   131      * The original string being matched.
   132      */
   133     CharSequence text;
   134 
   135     /**
   136      * Matcher state used by the last node. NOANCHOR is used when a
   137      * match does not have to consume all of the input. ENDANCHOR is
   138      * the mode used for matching all the input.
   139      */
   140     static final int ENDANCHOR = 1;
   141     static final int NOANCHOR = 0;
   142     int acceptMode = NOANCHOR;
   143 
   144     /**
   145      * The range of string that last matched the pattern. If the last
   146      * match failed then first is -1; last initially holds 0 then it
   147      * holds the index of the end of the last match (which is where the
   148      * next search starts).
   149      */
   150     int first = -1, last = 0;
   151 
   152     /**
   153      * The end index of what matched in the last match operation.
   154      */
   155     int oldLast = -1;
   156 
   157     /**
   158      * The index of the last position appended in a substitution.
   159      */
   160     int lastAppendPosition = 0;
   161 
   162     /**
   163      * Storage used by nodes to tell what repetition they are on in
   164      * a pattern, and where groups begin. The nodes themselves are stateless,
   165      * so they rely on this field to hold state during a match.
   166      */
   167     int[] locals;
   168 
   169     /**
   170      * Boolean indicating whether or not more input could change
   171      * the results of the last match.
   172      *
   173      * If hitEnd is true, and a match was found, then more input
   174      * might cause a different match to be found.
   175      * If hitEnd is true and a match was not found, then more
   176      * input could cause a match to be found.
   177      * If hitEnd is false and a match was found, then more input
   178      * will not change the match.
   179      * If hitEnd is false and a match was not found, then more
   180      * input will not cause a match to be found.
   181      */
   182     boolean hitEnd;
   183 
   184     /**
   185      * Boolean indicating whether or not more input could change
   186      * a positive match into a negative one.
   187      *
   188      * If requireEnd is true, and a match was found, then more
   189      * input could cause the match to be lost.
   190      * If requireEnd is false and a match was found, then more
   191      * input might change the match but the match won't be lost.
   192      * If a match was not found, then requireEnd has no meaning.
   193      */
   194     boolean requireEnd;
   195 
   196     /**
   197      * If transparentBounds is true then the boundaries of this
   198      * matcher's region are transparent to lookahead, lookbehind,
   199      * and boundary matching constructs that try to see beyond them.
   200      */
   201     boolean transparentBounds = false;
   202 
   203     /**
   204      * If anchoringBounds is true then the boundaries of this
   205      * matcher's region match anchors such as ^ and $.
   206      */
   207     boolean anchoringBounds = true;
   208 
   209     /**
   210      * No default constructor.
   211      */
   212     Matcher() {
   213     }
   214 
   215     /**
   216      * All matchers have the state used by Pattern during a match.
   217      */
   218     Matcher(Pattern parent, CharSequence text) {
   219         this.parentPattern = parent;
   220         this.text = text;
   221 
   222         // Allocate state storage
   223         int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
   224         groups = new int[parentGroupCount * 2];
   225         locals = new int[parent.localCount];
   226 
   227         // Put fields into initial states
   228         reset();
   229     }
   230 
   231     /**
   232      * Returns the pattern that is interpreted by this matcher.
   233      *
   234      * @return  The pattern for which this matcher was created
   235      */
   236     public Pattern pattern() {
   237         return parentPattern;
   238     }
   239 
   240     /**
   241      * Returns the match state of this matcher as a {@link MatchResult}.
   242      * The result is unaffected by subsequent operations performed upon this
   243      * matcher.
   244      *
   245      * @return  a <code>MatchResult</code> with the state of this matcher
   246      * @since 1.5
   247      */
   248     public MatchResult toMatchResult() {
   249         Matcher result = new Matcher(this.parentPattern, text.toString());
   250         result.first = this.first;
   251         result.last = this.last;
   252         result.groups = this.groups.clone();
   253         return result;
   254     }
   255 
   256     /**
   257       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
   258       * find matches with.
   259       *
   260       * <p> This method causes this matcher to lose information
   261       * about the groups of the last match that occurred. The
   262       * matcher's position in the input is maintained and its
   263       * last append position is unaffected.</p>
   264       *
   265       * @param  newPattern
   266       *         The new pattern used by this matcher
   267       * @return  This matcher
   268       * @throws  IllegalArgumentException
   269       *          If newPattern is <tt>null</tt>
   270       * @since 1.5
   271       */
   272     public Matcher usePattern(Pattern newPattern) {
   273         if (newPattern == null)
   274             throw new IllegalArgumentException("Pattern cannot be null");
   275         parentPattern = newPattern;
   276 
   277         // Reallocate state storage
   278         int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
   279         groups = new int[parentGroupCount * 2];
   280         locals = new int[newPattern.localCount];
   281         for (int i = 0; i < groups.length; i++)
   282             groups[i] = -1;
   283         for (int i = 0; i < locals.length; i++)
   284             locals[i] = -1;
   285         return this;
   286     }
   287 
   288     /**
   289      * Resets this matcher.
   290      *
   291      * <p> Resetting a matcher discards all of its explicit state information
   292      * and sets its append position to zero. The matcher's region is set to the
   293      * default region, which is its entire character sequence. The anchoring
   294      * and transparency of this matcher's region boundaries are unaffected.
   295      *
   296      * @return  This matcher
   297      */
   298     public Matcher reset() {
   299         first = -1;
   300         last = 0;
   301         oldLast = -1;
   302         for(int i=0; i<groups.length; i++)
   303             groups[i] = -1;
   304         for(int i=0; i<locals.length; i++)
   305             locals[i] = -1;
   306         lastAppendPosition = 0;
   307         from = 0;
   308         to = getTextLength();
   309         return this;
   310     }
   311 
   312     /**
   313      * Resets this matcher with a new input sequence.
   314      *
   315      * <p> Resetting a matcher discards all of its explicit state information
   316      * and sets its append position to zero.  The matcher's region is set to
   317      * the default region, which is its entire character sequence.  The
   318      * anchoring and transparency of this matcher's region boundaries are
   319      * unaffected.
   320      *
   321      * @param  input
   322      *         The new input character sequence
   323      *
   324      * @return  This matcher
   325      */
   326     public Matcher reset(CharSequence input) {
   327         text = input;
   328         return reset();
   329     }
   330 
   331     /**
   332      * Returns the start index of the previous match.  </p>
   333      *
   334      * @return  The index of the first character matched
   335      *
   336      * @throws  IllegalStateException
   337      *          If no match has yet been attempted,
   338      *          or if the previous match operation failed
   339      */
   340     public int start() {
   341         if (first < 0)
   342             throw new IllegalStateException("No match available");
   343         return first;
   344     }
   345 
   346     /**
   347      * Returns the start index of the subsequence captured by the given group
   348      * during the previous match operation.
   349      *
   350      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
   351      * to right, starting at one.  Group zero denotes the entire pattern, so
   352      * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
   353      * <i>m.</i><tt>start()</tt>.  </p>
   354      *
   355      * @param  group
   356      *         The index of a capturing group in this matcher's pattern
   357      *
   358      * @return  The index of the first character captured by the group,
   359      *          or <tt>-1</tt> if the match was successful but the group
   360      *          itself did not match anything
   361      *
   362      * @throws  IllegalStateException
   363      *          If no match has yet been attempted,
   364      *          or if the previous match operation failed
   365      *
   366      * @throws  IndexOutOfBoundsException
   367      *          If there is no capturing group in the pattern
   368      *          with the given index
   369      */
   370     public int start(int group) {
   371         if (first < 0)
   372             throw new IllegalStateException("No match available");
   373         if (group > groupCount())
   374             throw new IndexOutOfBoundsException("No group " + group);
   375         return groups[group * 2];
   376     }
   377 
   378     /**
   379      * Returns the offset after the last character matched.  </p>
   380      *
   381      * @return  The offset after the last character matched
   382      *
   383      * @throws  IllegalStateException
   384      *          If no match has yet been attempted,
   385      *          or if the previous match operation failed
   386      */
   387     public int end() {
   388         if (first < 0)
   389             throw new IllegalStateException("No match available");
   390         return last;
   391     }
   392 
   393     /**
   394      * Returns the offset after the last character of the subsequence
   395      * captured by the given group during the previous match operation.
   396      *
   397      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
   398      * to right, starting at one.  Group zero denotes the entire pattern, so
   399      * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
   400      * <i>m.</i><tt>end()</tt>.  </p>
   401      *
   402      * @param  group
   403      *         The index of a capturing group in this matcher's pattern
   404      *
   405      * @return  The offset after the last character captured by the group,
   406      *          or <tt>-1</tt> if the match was successful
   407      *          but the group itself did not match anything
   408      *
   409      * @throws  IllegalStateException
   410      *          If no match has yet been attempted,
   411      *          or if the previous match operation failed
   412      *
   413      * @throws  IndexOutOfBoundsException
   414      *          If there is no capturing group in the pattern
   415      *          with the given index
   416      */
   417     public int end(int group) {
   418         if (first < 0)
   419             throw new IllegalStateException("No match available");
   420         if (group > groupCount())
   421             throw new IndexOutOfBoundsException("No group " + group);
   422         return groups[group * 2 + 1];
   423     }
   424 
   425     /**
   426      * Returns the input subsequence matched by the previous match.
   427      *
   428      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
   429      * the expressions <i>m.</i><tt>group()</tt> and
   430      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
   431      * are equivalent.  </p>
   432      *
   433      * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
   434      * string.  This method will return the empty string when the pattern
   435      * successfully matches the empty string in the input.  </p>
   436      *
   437      * @return The (possibly empty) subsequence matched by the previous match,
   438      *         in string form
   439      *
   440      * @throws  IllegalStateException
   441      *          If no match has yet been attempted,
   442      *          or if the previous match operation failed
   443      */
   444     public String group() {
   445         return group(0);
   446     }
   447 
   448     /**
   449      * Returns the input subsequence captured by the given group during the
   450      * previous match operation.
   451      *
   452      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
   453      * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
   454      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
   455      * are equivalent.  </p>
   456      *
   457      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
   458      * to right, starting at one.  Group zero denotes the entire pattern, so
   459      * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
   460      * </p>
   461      *
   462      * <p> If the match was successful but the group specified failed to match
   463      * any part of the input sequence, then <tt>null</tt> is returned. Note
   464      * that some groups, for example <tt>(a*)</tt>, match the empty string.
   465      * This method will return the empty string when such a group successfully
   466      * matches the empty string in the input.  </p>
   467      *
   468      * @param  group
   469      *         The index of a capturing group in this matcher's pattern
   470      *
   471      * @return  The (possibly empty) subsequence captured by the group
   472      *          during the previous match, or <tt>null</tt> if the group
   473      *          failed to match part of the input
   474      *
   475      * @throws  IllegalStateException
   476      *          If no match has yet been attempted,
   477      *          or if the previous match operation failed
   478      *
   479      * @throws  IndexOutOfBoundsException
   480      *          If there is no capturing group in the pattern
   481      *          with the given index
   482      */
   483     public String group(int group) {
   484         if (first < 0)
   485             throw new IllegalStateException("No match found");
   486         if (group < 0 || group > groupCount())
   487             throw new IndexOutOfBoundsException("No group " + group);
   488         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
   489             return null;
   490         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
   491     }
   492 
   493     /**
   494      * Returns the input subsequence captured by the given
   495      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
   496      * match operation.
   497      *
   498      * <p> If the match was successful but the group specified failed to match
   499      * any part of the input sequence, then <tt>null</tt> is returned. Note
   500      * that some groups, for example <tt>(a*)</tt>, match the empty string.
   501      * This method will return the empty string when such a group successfully
   502      * matches the empty string in the input.  </p>
   503      *
   504      * @param  name
   505      *         The name of a named-capturing group in this matcher's pattern
   506      *
   507      * @return  The (possibly empty) subsequence captured by the named group
   508      *          during the previous match, or <tt>null</tt> if the group
   509      *          failed to match part of the input
   510      *
   511      * @throws  IllegalStateException
   512      *          If no match has yet been attempted,
   513      *          or if the previous match operation failed
   514      *
   515      * @throws  IllegalArgumentException
   516      *          If there is no capturing group in the pattern
   517      *          with the given name
   518      */
   519     public String group(String name) {
   520         if (name == null)
   521             throw new NullPointerException("Null group name");
   522         if (first < 0)
   523             throw new IllegalStateException("No match found");
   524         if (!parentPattern.namedGroups().containsKey(name))
   525             throw new IllegalArgumentException("No group with name <" + name + ">");
   526         int group = parentPattern.namedGroups().get(name);
   527         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
   528             return null;
   529         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
   530     }
   531 
   532     /**
   533      * Returns the number of capturing groups in this matcher's pattern.
   534      *
   535      * <p> Group zero denotes the entire pattern by convention. It is not
   536      * included in this count.
   537      *
   538      * <p> Any non-negative integer smaller than or equal to the value
   539      * returned by this method is guaranteed to be a valid group index for
   540      * this matcher.  </p>
   541      *
   542      * @return The number of capturing groups in this matcher's pattern
   543      */
   544     public int groupCount() {
   545         return parentPattern.capturingGroupCount - 1;
   546     }
   547 
   548     /**
   549      * Attempts to match the entire region against the pattern.
   550      *
   551      * <p> If the match succeeds then more information can be obtained via the
   552      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
   553      *
   554      * @return  <tt>true</tt> if, and only if, the entire region sequence
   555      *          matches this matcher's pattern
   556      */
   557     public boolean matches() {
   558         return match(from, ENDANCHOR);
   559     }
   560 
   561     /**
   562      * Attempts to find the next subsequence of the input sequence that matches
   563      * the pattern.
   564      *
   565      * <p> This method starts at the beginning of this matcher's region, or, if
   566      * a previous invocation of the method was successful and the matcher has
   567      * not since been reset, at the first character not matched by the previous
   568      * match.
   569      *
   570      * <p> If the match succeeds then more information can be obtained via the
   571      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
   572      *
   573      * @return  <tt>true</tt> if, and only if, a subsequence of the input
   574      *          sequence matches this matcher's pattern
   575      */
   576     public boolean find() {
   577         int nextSearchIndex = last;
   578         if (nextSearchIndex == first)
   579             nextSearchIndex++;
   580 
   581         // If next search starts before region, start it at region
   582         if (nextSearchIndex < from)
   583             nextSearchIndex = from;
   584 
   585         // If next search starts beyond region then it fails
   586         if (nextSearchIndex > to) {
   587             for (int i = 0; i < groups.length; i++)
   588                 groups[i] = -1;
   589             return false;
   590         }
   591         return search(nextSearchIndex);
   592     }
   593 
   594     /**
   595      * Resets this matcher and then attempts to find the next subsequence of
   596      * the input sequence that matches the pattern, starting at the specified
   597      * index.
   598      *
   599      * <p> If the match succeeds then more information can be obtained via the
   600      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
   601      * invocations of the {@link #find()} method will start at the first
   602      * character not matched by this match.  </p>
   603      *
   604      * @throws  IndexOutOfBoundsException
   605      *          If start is less than zero or if start is greater than the
   606      *          length of the input sequence.
   607      *
   608      * @return  <tt>true</tt> if, and only if, a subsequence of the input
   609      *          sequence starting at the given index matches this matcher's
   610      *          pattern
   611      */
   612     public boolean find(int start) {
   613         int limit = getTextLength();
   614         if ((start < 0) || (start > limit))
   615             throw new IndexOutOfBoundsException("Illegal start index");
   616         reset();
   617         return search(start);
   618     }
   619 
   620     /**
   621      * Attempts to match the input sequence, starting at the beginning of the
   622      * region, against the pattern.
   623      *
   624      * <p> Like the {@link #matches matches} method, this method always starts
   625      * at the beginning of the region; unlike that method, it does not
   626      * require that the entire region be matched.
   627      *
   628      * <p> If the match succeeds then more information can be obtained via the
   629      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
   630      *
   631      * @return  <tt>true</tt> if, and only if, a prefix of the input
   632      *          sequence matches this matcher's pattern
   633      */
   634     public boolean lookingAt() {
   635         return match(from, NOANCHOR);
   636     }
   637 
   638     /**
   639      * Returns a literal replacement <code>String</code> for the specified
   640      * <code>String</code>.
   641      *
   642      * This method produces a <code>String</code> that will work
   643      * as a literal replacement <code>s</code> in the
   644      * <code>appendReplacement</code> method of the {@link Matcher} class.
   645      * The <code>String</code> produced will match the sequence of characters
   646      * in <code>s</code> treated as a literal sequence. Slashes ('\') and
   647      * dollar signs ('$') will be given no special meaning.
   648      *
   649      * @param  s The string to be literalized
   650      * @return  A literal string replacement
   651      * @since 1.5
   652      */
   653     public static String quoteReplacement(String s) {
   654         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
   655             return s;
   656         StringBuilder sb = new StringBuilder();
   657         for (int i=0; i<s.length(); i++) {
   658             char c = s.charAt(i);
   659             if (c == '\\' || c == '$') {
   660                 sb.append('\\');
   661             }
   662             sb.append(c);
   663         }
   664         return sb.toString();
   665     }
   666 
   667     /**
   668      * Implements a non-terminal append-and-replace step.
   669      *
   670      * <p> This method performs the following actions: </p>
   671      *
   672      * <ol>
   673      *
   674      *   <li><p> It reads characters from the input sequence, starting at the
   675      *   append position, and appends them to the given string buffer.  It
   676      *   stops after reading the last character preceding the previous match,
   677      *   that is, the character at index {@link
   678      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
   679      *
   680      *   <li><p> It appends the given replacement string to the string buffer.
   681      *   </p></li>
   682      *
   683      *   <li><p> It sets the append position of this matcher to the index of
   684      *   the last character matched, plus one, that is, to {@link #end()}.
   685      *   </p></li>
   686      *
   687      * </ol>
   688      *
   689      * <p> The replacement string may contain references to subsequences
   690      * captured during the previous match: Each occurrence of
   691      * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
   692      * will be replaced by the result of evaluating the corresponding
   693      * {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
   694      * respectively. For  <tt>$</tt><i>g</i><tt></tt>,
   695      * the first number after the <tt>$</tt> is always treated as part of
   696      * the group reference. Subsequent numbers are incorporated into g if
   697      * they would form a legal group reference. Only the numerals '0'
   698      * through '9' are considered as potential components of the group
   699      * reference. If the second group matched the string <tt>"foo"</tt>, for
   700      * example, then passing the replacement string <tt>"$2bar"</tt> would
   701      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
   702      * sign (<tt>$</tt>) may be included as a literal in the replacement
   703      * string by preceding it with a backslash (<tt>\$</tt>).
   704      *
   705      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
   706      * the replacement string may cause the results to be different than if it
   707      * were being treated as a literal replacement string. Dollar signs may be
   708      * treated as references to captured subsequences as described above, and
   709      * backslashes are used to escape literal characters in the replacement
   710      * string.
   711      *
   712      * <p> This method is intended to be used in a loop together with the
   713      * {@link #appendTail appendTail} and {@link #find find} methods.  The
   714      * following code, for example, writes <tt>one dog two dogs in the
   715      * yard</tt> to the standard-output stream: </p>
   716      *
   717      * <blockquote><pre>
   718      * Pattern p = Pattern.compile("cat");
   719      * Matcher m = p.matcher("one cat two cats in the yard");
   720      * StringBuffer sb = new StringBuffer();
   721      * while (m.find()) {
   722      *     m.appendReplacement(sb, "dog");
   723      * }
   724      * m.appendTail(sb);
   725      * System.out.println(sb.toString());</pre></blockquote>
   726      *
   727      * @param  sb
   728      *         The target string buffer
   729      *
   730      * @param  replacement
   731      *         The replacement string
   732      *
   733      * @return  This matcher
   734      *
   735      * @throws  IllegalStateException
   736      *          If no match has yet been attempted,
   737      *          or if the previous match operation failed
   738      *
   739      * @throws  IllegalArgumentException
   740      *          If the replacement string refers to a named-capturing
   741      *          group that does not exist in the pattern
   742      *
   743      * @throws  IndexOutOfBoundsException
   744      *          If the replacement string refers to a capturing group
   745      *          that does not exist in the pattern
   746      */
   747     public Matcher appendReplacement(StringBuffer sb, String replacement) {
   748 
   749         // If no match, return error
   750         if (first < 0)
   751             throw new IllegalStateException("No match available");
   752 
   753         // Process substitution string to replace group references with groups
   754         int cursor = 0;
   755         StringBuilder result = new StringBuilder();
   756 
   757         while (cursor < replacement.length()) {
   758             char nextChar = replacement.charAt(cursor);
   759             if (nextChar == '\\') {
   760                 cursor++;
   761                 nextChar = replacement.charAt(cursor);
   762                 result.append(nextChar);
   763                 cursor++;
   764             } else if (nextChar == '$') {
   765                 // Skip past $
   766                 cursor++;
   767                 // A StringIndexOutOfBoundsException is thrown if
   768                 // this "$" is the last character in replacement
   769                 // string in current implementation, a IAE might be
   770                 // more appropriate.
   771                 nextChar = replacement.charAt(cursor);
   772                 int refNum = -1;
   773                 if (nextChar == '{') {
   774                     cursor++;
   775                     StringBuilder gsb = new StringBuilder();
   776                     while (cursor < replacement.length()) {
   777                         nextChar = replacement.charAt(cursor);
   778                         if (ASCII.isLower(nextChar) ||
   779                             ASCII.isUpper(nextChar) ||
   780                             ASCII.isDigit(nextChar)) {
   781                             gsb.append(nextChar);
   782                             cursor++;
   783                         } else {
   784                             break;
   785                         }
   786                     }
   787                     if (gsb.length() == 0)
   788                         throw new IllegalArgumentException(
   789                             "named capturing group has 0 length name");
   790                     if (nextChar != '}')
   791                         throw new IllegalArgumentException(
   792                             "named capturing group is missing trailing '}'");
   793                     String gname = gsb.toString();
   794                     if (ASCII.isDigit(gname.charAt(0)))
   795                         throw new IllegalArgumentException(
   796                             "capturing group name {" + gname +
   797                             "} starts with digit character");
   798                     if (!parentPattern.namedGroups().containsKey(gname))
   799                         throw new IllegalArgumentException(
   800                             "No group with name {" + gname + "}");
   801                     refNum = parentPattern.namedGroups().get(gname);
   802                     cursor++;
   803                 } else {
   804                     // The first number is always a group
   805                     refNum = (int)nextChar - '0';
   806                     if ((refNum < 0)||(refNum > 9))
   807                         throw new IllegalArgumentException(
   808                             "Illegal group reference");
   809                     cursor++;
   810                     // Capture the largest legal group string
   811                     boolean done = false;
   812                     while (!done) {
   813                         if (cursor >= replacement.length()) {
   814                             break;
   815                         }
   816                         int nextDigit = replacement.charAt(cursor) - '0';
   817                         if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
   818                             break;
   819                         }
   820                         int newRefNum = (refNum * 10) + nextDigit;
   821                         if (groupCount() < newRefNum) {
   822                             done = true;
   823                         } else {
   824                             refNum = newRefNum;
   825                             cursor++;
   826                         }
   827                     }
   828                 }
   829                 // Append group
   830                 if (start(refNum) != -1 && end(refNum) != -1)
   831                     result.append(text, start(refNum), end(refNum));
   832             } else {
   833                 result.append(nextChar);
   834                 cursor++;
   835             }
   836         }
   837         // Append the intervening text
   838         sb.append(text, lastAppendPosition, first);
   839         // Append the match substitution
   840         sb.append(result);
   841 
   842         lastAppendPosition = last;
   843         return this;
   844     }
   845 
   846     /**
   847      * Implements a terminal append-and-replace step.
   848      *
   849      * <p> This method reads characters from the input sequence, starting at
   850      * the append position, and appends them to the given string buffer.  It is
   851      * intended to be invoked after one or more invocations of the {@link
   852      * #appendReplacement appendReplacement} method in order to copy the
   853      * remainder of the input sequence.  </p>
   854      *
   855      * @param  sb
   856      *         The target string buffer
   857      *
   858      * @return  The target string buffer
   859      */
   860     public StringBuffer appendTail(StringBuffer sb) {
   861         sb.append(text, lastAppendPosition, getTextLength());
   862         return sb;
   863     }
   864 
   865     /**
   866      * Replaces every subsequence of the input sequence that matches the
   867      * pattern with the given replacement string.
   868      *
   869      * <p> This method first resets this matcher.  It then scans the input
   870      * sequence looking for matches of the pattern.  Characters that are not
   871      * part of any match are appended directly to the result string; each match
   872      * is replaced in the result by the replacement string.  The replacement
   873      * string may contain references to captured subsequences as in the {@link
   874      * #appendReplacement appendReplacement} method.
   875      *
   876      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
   877      * the replacement string may cause the results to be different than if it
   878      * were being treated as a literal replacement string. Dollar signs may be
   879      * treated as references to captured subsequences as described above, and
   880      * backslashes are used to escape literal characters in the replacement
   881      * string.
   882      *
   883      * <p> Given the regular expression <tt>a*b</tt>, the input
   884      * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
   885      * <tt>"-"</tt>, an invocation of this method on a matcher for that
   886      * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
   887      *
   888      * <p> Invoking this method changes this matcher's state.  If the matcher
   889      * is to be used in further matching operations then it should first be
   890      * reset.  </p>
   891      *
   892      * @param  replacement
   893      *         The replacement string
   894      *
   895      * @return  The string constructed by replacing each matching subsequence
   896      *          by the replacement string, substituting captured subsequences
   897      *          as needed
   898      */
   899     public String replaceAll(String replacement) {
   900         reset();
   901         boolean result = find();
   902         if (result) {
   903             StringBuffer sb = new StringBuffer();
   904             do {
   905                 appendReplacement(sb, replacement);
   906                 result = find();
   907             } while (result);
   908             appendTail(sb);
   909             return sb.toString();
   910         }
   911         return text.toString();
   912     }
   913 
   914     /**
   915      * Replaces the first subsequence of the input sequence that matches the
   916      * pattern with the given replacement string.
   917      *
   918      * <p> This method first resets this matcher.  It then scans the input
   919      * sequence looking for a match of the pattern.  Characters that are not
   920      * part of the match are appended directly to the result string; the match
   921      * is replaced in the result by the replacement string.  The replacement
   922      * string may contain references to captured subsequences as in the {@link
   923      * #appendReplacement appendReplacement} method.
   924      *
   925      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
   926      * the replacement string may cause the results to be different than if it
   927      * were being treated as a literal replacement string. Dollar signs may be
   928      * treated as references to captured subsequences as described above, and
   929      * backslashes are used to escape literal characters in the replacement
   930      * string.
   931      *
   932      * <p> Given the regular expression <tt>dog</tt>, the input
   933      * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
   934      * <tt>"cat"</tt>, an invocation of this method on a matcher for that
   935      * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>
   936      *
   937      * <p> Invoking this method changes this matcher's state.  If the matcher
   938      * is to be used in further matching operations then it should first be
   939      * reset.  </p>
   940      *
   941      * @param  replacement
   942      *         The replacement string
   943      * @return  The string constructed by replacing the first matching
   944      *          subsequence by the replacement string, substituting captured
   945      *          subsequences as needed
   946      */
   947     public String replaceFirst(String replacement) {
   948         if (replacement == null)
   949             throw new NullPointerException("replacement");
   950         reset();
   951         if (!find())
   952             return text.toString();
   953         StringBuffer sb = new StringBuffer();
   954         appendReplacement(sb, replacement);
   955         appendTail(sb);
   956         return sb.toString();
   957     }
   958 
   959     /**
   960      * Sets the limits of this matcher's region. The region is the part of the
   961      * input sequence that will be searched to find a match. Invoking this
   962      * method resets the matcher, and then sets the region to start at the
   963      * index specified by the <code>start</code> parameter and end at the
   964      * index specified by the <code>end</code> parameter.
   965      *
   966      * <p>Depending on the transparency and anchoring being used (see
   967      * {@link #useTransparentBounds useTransparentBounds} and
   968      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
   969      * as anchors may behave differently at or around the boundaries of the
   970      * region.
   971      *
   972      * @param  start
   973      *         The index to start searching at (inclusive)
   974      * @param  end
   975      *         The index to end searching at (exclusive)
   976      * @throws  IndexOutOfBoundsException
   977      *          If start or end is less than zero, if
   978      *          start is greater than the length of the input sequence, if
   979      *          end is greater than the length of the input sequence, or if
   980      *          start is greater than end.
   981      * @return  this matcher
   982      * @since 1.5
   983      */
   984     public Matcher region(int start, int end) {
   985         if ((start < 0) || (start > getTextLength()))
   986             throw new IndexOutOfBoundsException("start");
   987         if ((end < 0) || (end > getTextLength()))
   988             throw new IndexOutOfBoundsException("end");
   989         if (start > end)
   990             throw new IndexOutOfBoundsException("start > end");
   991         reset();
   992         from = start;
   993         to = end;
   994         return this;
   995     }
   996 
   997     /**
   998      * Reports the start index of this matcher's region. The
   999      * searches this matcher conducts are limited to finding matches
  1000      * within {@link #regionStart regionStart} (inclusive) and
  1001      * {@link #regionEnd regionEnd} (exclusive).
  1002      *
  1003      * @return  The starting point of this matcher's region
  1004      * @since 1.5
  1005      */
  1006     public int regionStart() {
  1007         return from;
  1008     }
  1009 
  1010     /**
  1011      * Reports the end index (exclusive) of this matcher's region.
  1012      * The searches this matcher conducts are limited to finding matches
  1013      * within {@link #regionStart regionStart} (inclusive) and
  1014      * {@link #regionEnd regionEnd} (exclusive).
  1015      *
  1016      * @return  the ending point of this matcher's region
  1017      * @since 1.5
  1018      */
  1019     public int regionEnd() {
  1020         return to;
  1021     }
  1022 
  1023     /**
  1024      * Queries the transparency of region bounds for this matcher.
  1025      *
  1026      * <p> This method returns <tt>true</tt> if this matcher uses
  1027      * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
  1028      * bounds.
  1029      *
  1030      * <p> See {@link #useTransparentBounds useTransparentBounds} for a
  1031      * description of transparent and opaque bounds.
  1032      *
  1033      * <p> By default, a matcher uses opaque region boundaries.
  1034      *
  1035      * @return <tt>true</tt> iff this matcher is using transparent bounds,
  1036      *         <tt>false</tt> otherwise.
  1037      * @see java.util.regex.Matcher#useTransparentBounds(boolean)
  1038      * @since 1.5
  1039      */
  1040     public boolean hasTransparentBounds() {
  1041         return transparentBounds;
  1042     }
  1043 
  1044     /**
  1045      * Sets the transparency of region bounds for this matcher.
  1046      *
  1047      * <p> Invoking this method with an argument of <tt>true</tt> will set this
  1048      * matcher to use <i>transparent</i> bounds. If the boolean
  1049      * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
  1050      *
  1051      * <p> Using transparent bounds, the boundaries of this
  1052      * matcher's region are transparent to lookahead, lookbehind,
  1053      * and boundary matching constructs. Those constructs can see beyond the
  1054      * boundaries of the region to see if a match is appropriate.
  1055      *
  1056      * <p> Using opaque bounds, the boundaries of this matcher's
  1057      * region are opaque to lookahead, lookbehind, and boundary matching
  1058      * constructs that may try to see beyond them. Those constructs cannot
  1059      * look past the boundaries so they will fail to match anything outside
  1060      * of the region.
  1061      *
  1062      * <p> By default, a matcher uses opaque bounds.
  1063      *
  1064      * @param  b a boolean indicating whether to use opaque or transparent
  1065      *         regions
  1066      * @return this matcher
  1067      * @see java.util.regex.Matcher#hasTransparentBounds
  1068      * @since 1.5
  1069      */
  1070     public Matcher useTransparentBounds(boolean b) {
  1071         transparentBounds = b;
  1072         return this;
  1073     }
  1074 
  1075     /**
  1076      * Queries the anchoring of region bounds for this matcher.
  1077      *
  1078      * <p> This method returns <tt>true</tt> if this matcher uses
  1079      * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
  1080      *
  1081      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
  1082      * description of anchoring bounds.
  1083      *
  1084      * <p> By default, a matcher uses anchoring region boundaries.
  1085      *
  1086      * @return <tt>true</tt> iff this matcher is using anchoring bounds,
  1087      *         <tt>false</tt> otherwise.
  1088      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
  1089      * @since 1.5
  1090      */
  1091     public boolean hasAnchoringBounds() {
  1092         return anchoringBounds;
  1093     }
  1094 
  1095     /**
  1096      * Sets the anchoring of region bounds for this matcher.
  1097      *
  1098      * <p> Invoking this method with an argument of <tt>true</tt> will set this
  1099      * matcher to use <i>anchoring</i> bounds. If the boolean
  1100      * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
  1101      * used.
  1102      *
  1103      * <p> Using anchoring bounds, the boundaries of this
  1104      * matcher's region match anchors such as ^ and $.
  1105      *
  1106      * <p> Without anchoring bounds, the boundaries of this
  1107      * matcher's region will not match anchors such as ^ and $.
  1108      *
  1109      * <p> By default, a matcher uses anchoring region boundaries.
  1110      *
  1111      * @param  b a boolean indicating whether or not to use anchoring bounds.
  1112      * @return this matcher
  1113      * @see java.util.regex.Matcher#hasAnchoringBounds
  1114      * @since 1.5
  1115      */
  1116     public Matcher useAnchoringBounds(boolean b) {
  1117         anchoringBounds = b;
  1118         return this;
  1119     }
  1120 
  1121     /**
  1122      * <p>Returns the string representation of this matcher. The
  1123      * string representation of a <code>Matcher</code> contains information
  1124      * that may be useful for debugging. The exact format is unspecified.
  1125      *
  1126      * @return  The string representation of this matcher
  1127      * @since 1.5
  1128      */
  1129     public String toString() {
  1130         StringBuilder sb = new StringBuilder();
  1131         sb.append("java.util.regex.Matcher");
  1132         sb.append("[pattern=" + pattern());
  1133         sb.append(" region=");
  1134         sb.append(regionStart() + "," + regionEnd());
  1135         sb.append(" lastmatch=");
  1136         if ((first >= 0) && (group() != null)) {
  1137             sb.append(group());
  1138         }
  1139         sb.append("]");
  1140         return sb.toString();
  1141     }
  1142 
  1143     /**
  1144      * <p>Returns true if the end of input was hit by the search engine in
  1145      * the last match operation performed by this matcher.
  1146      *
  1147      * <p>When this method returns true, then it is possible that more input
  1148      * would have changed the result of the last search.
  1149      *
  1150      * @return  true iff the end of input was hit in the last match; false
  1151      *          otherwise
  1152      * @since 1.5
  1153      */
  1154     public boolean hitEnd() {
  1155         return hitEnd;
  1156     }
  1157 
  1158     /**
  1159      * <p>Returns true if more input could change a positive match into a
  1160      * negative one.
  1161      *
  1162      * <p>If this method returns true, and a match was found, then more
  1163      * input could cause the match to be lost. If this method returns false
  1164      * and a match was found, then more input might change the match but the
  1165      * match won't be lost. If a match was not found, then requireEnd has no
  1166      * meaning.
  1167      *
  1168      * @return  true iff more input could change a positive match into a
  1169      *          negative one.
  1170      * @since 1.5
  1171      */
  1172     public boolean requireEnd() {
  1173         return requireEnd;
  1174     }
  1175 
  1176     /**
  1177      * Initiates a search to find a Pattern within the given bounds.
  1178      * The groups are filled with default values and the match of the root
  1179      * of the state machine is called. The state machine will hold the state
  1180      * of the match as it proceeds in this matcher.
  1181      *
  1182      * Matcher.from is not set here, because it is the "hard" boundary
  1183      * of the start of the search which anchors will set to. The from param
  1184      * is the "soft" boundary of the start of the search, meaning that the
  1185      * regex tries to match at that index but ^ won't match there. Subsequent
  1186      * calls to the search methods start at a new "soft" boundary which is
  1187      * the end of the previous match.
  1188      */
  1189     boolean search(int from) {
  1190         this.hitEnd = false;
  1191         this.requireEnd = false;
  1192         from        = from < 0 ? 0 : from;
  1193         this.first  = from;
  1194         this.oldLast = oldLast < 0 ? from : oldLast;
  1195         for (int i = 0; i < groups.length; i++)
  1196             groups[i] = -1;
  1197         acceptMode = NOANCHOR;
  1198         boolean result = parentPattern.root.match(this, from, text);
  1199         if (!result)
  1200             this.first = -1;
  1201         this.oldLast = this.last;
  1202         return result;
  1203     }
  1204 
  1205     /**
  1206      * Initiates a search for an anchored match to a Pattern within the given
  1207      * bounds. The groups are filled with default values and the match of the
  1208      * root of the state machine is called. The state machine will hold the
  1209      * state of the match as it proceeds in this matcher.
  1210      */
  1211     boolean match(int from, int anchor) {
  1212         this.hitEnd = false;
  1213         this.requireEnd = false;
  1214         from        = from < 0 ? 0 : from;
  1215         this.first  = from;
  1216         this.oldLast = oldLast < 0 ? from : oldLast;
  1217         for (int i = 0; i < groups.length; i++)
  1218             groups[i] = -1;
  1219         acceptMode = anchor;
  1220         boolean result = parentPattern.matchRoot.match(this, from, text);
  1221         if (!result)
  1222             this.first = -1;
  1223         this.oldLast = this.last;
  1224         return result;
  1225     }
  1226 
  1227     /**
  1228      * Returns the end index of the text.
  1229      *
  1230      * @return the index after the last character in the text
  1231      */
  1232     int getTextLength() {
  1233         return text.length();
  1234     }
  1235 
  1236     /**
  1237      * Generates a String from this Matcher's input in the specified range.
  1238      *
  1239      * @param  beginIndex   the beginning index, inclusive
  1240      * @param  endIndex     the ending index, exclusive
  1241      * @return A String generated from this Matcher's input
  1242      */
  1243     CharSequence getSubSequence(int beginIndex, int endIndex) {
  1244         return text.subSequence(beginIndex, endIndex);
  1245     }
  1246 
  1247     /**
  1248      * Returns this Matcher's input character at index i.
  1249      *
  1250      * @return A char from the specified index
  1251      */
  1252     char charAt(int i) {
  1253         return text.charAt(i);
  1254     }
  1255 
  1256 }