rt/emul/compact/src/main/java/java/util/StringTokenizer.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Tue, 26 Feb 2013 16:54:16 +0100
changeset 772 d382dacfd73f
parent 597 emul/compact/src/main/java/java/util/StringTokenizer.java@ee8a922f4268
permissions -rw-r--r--
Moving modules around so the runtime is under one master pom and can be built without building other modules that are in the repository
     1 /*
     2  * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    25 
    26 package java.util;
    27 
    28 import java.lang.*;
    29 
    30 /**
    31  * The string tokenizer class allows an application to break a
    32  * string into tokens. The tokenization method is much simpler than
    33  * the one used by the <code>StreamTokenizer</code> class. The
    34  * <code>StringTokenizer</code> methods do not distinguish among
    35  * identifiers, numbers, and quoted strings, nor do they recognize
    36  * and skip comments.
    37  * <p>
    38  * The set of delimiters (the characters that separate tokens) may
    39  * be specified either at creation time or on a per-token basis.
    40  * <p>
    41  * An instance of <code>StringTokenizer</code> behaves in one of two
    42  * ways, depending on whether it was created with the
    43  * <code>returnDelims</code> flag having the value <code>true</code>
    44  * or <code>false</code>:
    45  * <ul>
    46  * <li>If the flag is <code>false</code>, delimiter characters serve to
    47  *     separate tokens. A token is a maximal sequence of consecutive
    48  *     characters that are not delimiters.
    49  * <li>If the flag is <code>true</code>, delimiter characters are themselves
    50  *     considered to be tokens. A token is thus either one delimiter
    51  *     character, or a maximal sequence of consecutive characters that are
    52  *     not delimiters.
    53  * </ul><p>
    54  * A <tt>StringTokenizer</tt> object internally maintains a current
    55  * position within the string to be tokenized. Some operations advance this
    56  * current position past the characters processed.<p>
    57  * A token is returned by taking a substring of the string that was used to
    58  * create the <tt>StringTokenizer</tt> object.
    59  * <p>
    60  * The following is one example of the use of the tokenizer. The code:
    61  * <blockquote><pre>
    62  *     StringTokenizer st = new StringTokenizer("this is a test");
    63  *     while (st.hasMoreTokens()) {
    64  *         System.out.println(st.nextToken());
    65  *     }
    66  * </pre></blockquote>
    67  * <p>
    68  * prints the following output:
    69  * <blockquote><pre>
    70  *     this
    71  *     is
    72  *     a
    73  *     test
    74  * </pre></blockquote>
    75  *
    76  * <p>
    77  * <tt>StringTokenizer</tt> is a legacy class that is retained for
    78  * compatibility reasons although its use is discouraged in new code. It is
    79  * recommended that anyone seeking this functionality use the <tt>split</tt>
    80  * method of <tt>String</tt> or the java.util.regex package instead.
    81  * <p>
    82  * The following example illustrates how the <tt>String.split</tt>
    83  * method can be used to break up a string into its basic tokens:
    84  * <blockquote><pre>
    85  *     String[] result = "this is a test".split("\\s");
    86  *     for (int x=0; x&lt;result.length; x++)
    87  *         System.out.println(result[x]);
    88  * </pre></blockquote>
    89  * <p>
    90  * prints the following output:
    91  * <blockquote><pre>
    92  *     this
    93  *     is
    94  *     a
    95  *     test
    96  * </pre></blockquote>
    97  *
    98  * @author  unascribed
    99  * @see     java.io.StreamTokenizer
   100  * @since   JDK1.0
   101  */
   102 public
   103 class StringTokenizer implements Enumeration<Object> {
   104     private int currentPosition;
   105     private int newPosition;
   106     private int maxPosition;
   107     private String str;
   108     private String delimiters;
   109     private boolean retDelims;
   110     private boolean delimsChanged;
   111 
   112     /**
   113      * maxDelimCodePoint stores the value of the delimiter character with the
   114      * highest value. It is used to optimize the detection of delimiter
   115      * characters.
   116      *
   117      * It is unlikely to provide any optimization benefit in the
   118      * hasSurrogates case because most string characters will be
   119      * smaller than the limit, but we keep it so that the two code
   120      * paths remain similar.
   121      */
   122     private int maxDelimCodePoint;
   123 
   124     /**
   125      * If delimiters include any surrogates (including surrogate
   126      * pairs), hasSurrogates is true and the tokenizer uses the
   127      * different code path. This is because String.indexOf(int)
   128      * doesn't handle unpaired surrogates as a single character.
   129      */
   130     private boolean hasSurrogates = false;
   131 
   132     /**
   133      * When hasSurrogates is true, delimiters are converted to code
   134      * points and isDelimiter(int) is used to determine if the given
   135      * codepoint is a delimiter.
   136      */
   137     private int[] delimiterCodePoints;
   138 
   139     /**
   140      * Set maxDelimCodePoint to the highest char in the delimiter set.
   141      */
   142     private void setMaxDelimCodePoint() {
   143         if (delimiters == null) {
   144             maxDelimCodePoint = 0;
   145             return;
   146         }
   147 
   148         int m = 0;
   149         int c;
   150         int count = 0;
   151         for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) {
   152             c = delimiters.charAt(i);
   153             if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) {
   154                 c = delimiters.codePointAt(i);
   155                 hasSurrogates = true;
   156             }
   157             if (m < c)
   158                 m = c;
   159             count++;
   160         }
   161         maxDelimCodePoint = m;
   162 
   163         if (hasSurrogates) {
   164             delimiterCodePoints = new int[count];
   165             for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) {
   166                 c = delimiters.codePointAt(j);
   167                 delimiterCodePoints[i] = c;
   168             }
   169         }
   170     }
   171 
   172     /**
   173      * Constructs a string tokenizer for the specified string. All
   174      * characters in the <code>delim</code> argument are the delimiters
   175      * for separating tokens.
   176      * <p>
   177      * If the <code>returnDelims</code> flag is <code>true</code>, then
   178      * the delimiter characters are also returned as tokens. Each
   179      * delimiter is returned as a string of length one. If the flag is
   180      * <code>false</code>, the delimiter characters are skipped and only
   181      * serve as separators between tokens.
   182      * <p>
   183      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
   184      * not throw an exception. However, trying to invoke other methods on the
   185      * resulting <tt>StringTokenizer</tt> may result in a
   186      * <tt>NullPointerException</tt>.
   187      *
   188      * @param   str            a string to be parsed.
   189      * @param   delim          the delimiters.
   190      * @param   returnDelims   flag indicating whether to return the delimiters
   191      *                         as tokens.
   192      * @exception NullPointerException if str is <CODE>null</CODE>
   193      */
   194     public StringTokenizer(String str, String delim, boolean returnDelims) {
   195         currentPosition = 0;
   196         newPosition = -1;
   197         delimsChanged = false;
   198         this.str = str;
   199         maxPosition = str.length();
   200         delimiters = delim;
   201         retDelims = returnDelims;
   202         setMaxDelimCodePoint();
   203     }
   204 
   205     /**
   206      * Constructs a string tokenizer for the specified string. The
   207      * characters in the <code>delim</code> argument are the delimiters
   208      * for separating tokens. Delimiter characters themselves will not
   209      * be treated as tokens.
   210      * <p>
   211      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
   212      * not throw an exception. However, trying to invoke other methods on the
   213      * resulting <tt>StringTokenizer</tt> may result in a
   214      * <tt>NullPointerException</tt>.
   215      *
   216      * @param   str     a string to be parsed.
   217      * @param   delim   the delimiters.
   218      * @exception NullPointerException if str is <CODE>null</CODE>
   219      */
   220     public StringTokenizer(String str, String delim) {
   221         this(str, delim, false);
   222     }
   223 
   224     /**
   225      * Constructs a string tokenizer for the specified string. The
   226      * tokenizer uses the default delimiter set, which is
   227      * <code>"&nbsp;&#92;t&#92;n&#92;r&#92;f"</code>: the space character,
   228      * the tab character, the newline character, the carriage-return character,
   229      * and the form-feed character. Delimiter characters themselves will
   230      * not be treated as tokens.
   231      *
   232      * @param   str   a string to be parsed.
   233      * @exception NullPointerException if str is <CODE>null</CODE>
   234      */
   235     public StringTokenizer(String str) {
   236         this(str, " \t\n\r\f", false);
   237     }
   238 
   239     /**
   240      * Skips delimiters starting from the specified position. If retDelims
   241      * is false, returns the index of the first non-delimiter character at or
   242      * after startPos. If retDelims is true, startPos is returned.
   243      */
   244     private int skipDelimiters(int startPos) {
   245         if (delimiters == null)
   246             throw new NullPointerException();
   247 
   248         int position = startPos;
   249         while (!retDelims && position < maxPosition) {
   250             if (!hasSurrogates) {
   251                 char c = str.charAt(position);
   252                 if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0))
   253                     break;
   254                 position++;
   255             } else {
   256                 int c = str.codePointAt(position);
   257                 if ((c > maxDelimCodePoint) || !isDelimiter(c)) {
   258                     break;
   259                 }
   260                 position += Character.charCount(c);
   261             }
   262         }
   263         return position;
   264     }
   265 
   266     /**
   267      * Skips ahead from startPos and returns the index of the next delimiter
   268      * character encountered, or maxPosition if no such delimiter is found.
   269      */
   270     private int scanToken(int startPos) {
   271         int position = startPos;
   272         while (position < maxPosition) {
   273             if (!hasSurrogates) {
   274                 char c = str.charAt(position);
   275                 if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))
   276                     break;
   277                 position++;
   278             } else {
   279                 int c = str.codePointAt(position);
   280                 if ((c <= maxDelimCodePoint) && isDelimiter(c))
   281                     break;
   282                 position += Character.charCount(c);
   283             }
   284         }
   285         if (retDelims && (startPos == position)) {
   286             if (!hasSurrogates) {
   287                 char c = str.charAt(position);
   288                 if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))
   289                     position++;
   290             } else {
   291                 int c = str.codePointAt(position);
   292                 if ((c <= maxDelimCodePoint) && isDelimiter(c))
   293                     position += Character.charCount(c);
   294             }
   295         }
   296         return position;
   297     }
   298 
   299     private boolean isDelimiter(int codePoint) {
   300         for (int i = 0; i < delimiterCodePoints.length; i++) {
   301             if (delimiterCodePoints[i] == codePoint) {
   302                 return true;
   303             }
   304         }
   305         return false;
   306     }
   307 
   308     /**
   309      * Tests if there are more tokens available from this tokenizer's string.
   310      * If this method returns <tt>true</tt>, then a subsequent call to
   311      * <tt>nextToken</tt> with no argument will successfully return a token.
   312      *
   313      * @return  <code>true</code> if and only if there is at least one token
   314      *          in the string after the current position; <code>false</code>
   315      *          otherwise.
   316      */
   317     public boolean hasMoreTokens() {
   318         /*
   319          * Temporarily store this position and use it in the following
   320          * nextToken() method only if the delimiters haven't been changed in
   321          * that nextToken() invocation.
   322          */
   323         newPosition = skipDelimiters(currentPosition);
   324         return (newPosition < maxPosition);
   325     }
   326 
   327     /**
   328      * Returns the next token from this string tokenizer.
   329      *
   330      * @return     the next token from this string tokenizer.
   331      * @exception  NoSuchElementException  if there are no more tokens in this
   332      *               tokenizer's string.
   333      */
   334     public String nextToken() {
   335         /*
   336          * If next position already computed in hasMoreElements() and
   337          * delimiters have changed between the computation and this invocation,
   338          * then use the computed value.
   339          */
   340 
   341         currentPosition = (newPosition >= 0 && !delimsChanged) ?
   342             newPosition : skipDelimiters(currentPosition);
   343 
   344         /* Reset these anyway */
   345         delimsChanged = false;
   346         newPosition = -1;
   347 
   348         if (currentPosition >= maxPosition)
   349             throw new NoSuchElementException();
   350         int start = currentPosition;
   351         currentPosition = scanToken(currentPosition);
   352         return str.substring(start, currentPosition);
   353     }
   354 
   355     /**
   356      * Returns the next token in this string tokenizer's string. First,
   357      * the set of characters considered to be delimiters by this
   358      * <tt>StringTokenizer</tt> object is changed to be the characters in
   359      * the string <tt>delim</tt>. Then the next token in the string
   360      * after the current position is returned. The current position is
   361      * advanced beyond the recognized token.  The new delimiter set
   362      * remains the default after this call.
   363      *
   364      * @param      delim   the new delimiters.
   365      * @return     the next token, after switching to the new delimiter set.
   366      * @exception  NoSuchElementException  if there are no more tokens in this
   367      *               tokenizer's string.
   368      * @exception NullPointerException if delim is <CODE>null</CODE>
   369      */
   370     public String nextToken(String delim) {
   371         delimiters = delim;
   372 
   373         /* delimiter string specified, so set the appropriate flag. */
   374         delimsChanged = true;
   375 
   376         setMaxDelimCodePoint();
   377         return nextToken();
   378     }
   379 
   380     /**
   381      * Returns the same value as the <code>hasMoreTokens</code>
   382      * method. It exists so that this class can implement the
   383      * <code>Enumeration</code> interface.
   384      *
   385      * @return  <code>true</code> if there are more tokens;
   386      *          <code>false</code> otherwise.
   387      * @see     java.util.Enumeration
   388      * @see     java.util.StringTokenizer#hasMoreTokens()
   389      */
   390     public boolean hasMoreElements() {
   391         return hasMoreTokens();
   392     }
   393 
   394     /**
   395      * Returns the same value as the <code>nextToken</code> method,
   396      * except that its declared return value is <code>Object</code> rather than
   397      * <code>String</code>. It exists so that this class can implement the
   398      * <code>Enumeration</code> interface.
   399      *
   400      * @return     the next token in the string.
   401      * @exception  NoSuchElementException  if there are no more tokens in this
   402      *               tokenizer's string.
   403      * @see        java.util.Enumeration
   404      * @see        java.util.StringTokenizer#nextToken()
   405      */
   406     public Object nextElement() {
   407         return nextToken();
   408     }
   409 
   410     /**
   411      * Calculates the number of times that this tokenizer's
   412      * <code>nextToken</code> method can be called before it generates an
   413      * exception. The current position is not advanced.
   414      *
   415      * @return  the number of tokens remaining in the string using the current
   416      *          delimiter set.
   417      * @see     java.util.StringTokenizer#nextToken()
   418      */
   419     public int countTokens() {
   420         int count = 0;
   421         int currpos = currentPosition;
   422         while (currpos < maxPosition) {
   423             currpos = skipDelimiters(currpos);
   424             if (currpos >= maxPosition)
   425                 break;
   426             currpos = scanToken(currpos);
   427             count++;
   428         }
   429         return count;
   430     }
   431 }