hg/bck2brwsr: rt/emul/compact/src/main/java/java/util/StringTokenizer.java@d382dacfd73f

     1 /*

     2  * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package java.util;

    28 import java.lang.*;

    30 /**

    31  * The string tokenizer class allows an application to break a

    32  * string into tokens. The tokenization method is much simpler than

    33  * the one used by the <code>StreamTokenizer</code> class. The

    34  * <code>StringTokenizer</code> methods do not distinguish among

    35  * identifiers, numbers, and quoted strings, nor do they recognize

    36  * and skip comments.

    37  * <p>

    38  * The set of delimiters (the characters that separate tokens) may

    39  * be specified either at creation time or on a per-token basis.

    40  * <p>

    41  * An instance of <code>StringTokenizer</code> behaves in one of two

    42  * ways, depending on whether it was created with the

    43  * <code>returnDelims</code> flag having the value <code>true</code>

    44  * or <code>false</code>:

    45  * <ul>

    46  * <li>If the flag is <code>false</code>, delimiter characters serve to

    47  *     separate tokens. A token is a maximal sequence of consecutive

    48  *     characters that are not delimiters.

    49  * <li>If the flag is <code>true</code>, delimiter characters are themselves

    50  *     considered to be tokens. A token is thus either one delimiter

    51  *     character, or a maximal sequence of consecutive characters that are

    52  *     not delimiters.

    53  * </ul><p>

    54  * A <tt>StringTokenizer</tt> object internally maintains a current

    55  * position within the string to be tokenized. Some operations advance this

    56  * current position past the characters processed.<p>

    57  * A token is returned by taking a substring of the string that was used to

    58  * create the <tt>StringTokenizer</tt> object.

    59  * <p>

    60  * The following is one example of the use of the tokenizer. The code:

    61  * <blockquote><pre>

    62  *     StringTokenizer st = new StringTokenizer("this is a test");

    63  *     while (st.hasMoreTokens()) {

    64  *         System.out.println(st.nextToken());

    65  *     }

    66  * </pre></blockquote>

    67  * <p>

    68  * prints the following output:

    69  * <blockquote><pre>

    70  *     this

    71  *     is

    72  *     a

    73  *     test

    74  * </pre></blockquote>

    75  *

    76  * <p>

    77  * <tt>StringTokenizer</tt> is a legacy class that is retained for

    78  * compatibility reasons although its use is discouraged in new code. It is

    79  * recommended that anyone seeking this functionality use the <tt>split</tt>

    80  * method of <tt>String</tt> or the java.util.regex package instead.

    81  * <p>

    82  * The following example illustrates how the <tt>String.split</tt>

    83  * method can be used to break up a string into its basic tokens:

    84  * <blockquote><pre>

    85  *     String[] result = "this is a test".split("\\s");

    86  *     for (int x=0; x&lt;result.length; x++)

    87  *         System.out.println(result[x]);

    88  * </pre></blockquote>

    89  * <p>

    90  * prints the following output:

    91  * <blockquote><pre>

    92  *     this

    93  *     is

    94  *     a

    95  *     test

    96  * </pre></blockquote>

    97  *

    98  * @author  unascribed

    99  * @see     java.io.StreamTokenizer

   100  * @since   JDK1.0

   101  */

   102 public

   103 class StringTokenizer implements Enumeration<Object> {

   104     private int currentPosition;

   105     private int newPosition;

   106     private int maxPosition;

   107     private String str;

   108     private String delimiters;

   109     private boolean retDelims;

   110     private boolean delimsChanged;

   112     /**

   113      * maxDelimCodePoint stores the value of the delimiter character with the

   114      * highest value. It is used to optimize the detection of delimiter

   115      * characters.

   116      *

   117      * It is unlikely to provide any optimization benefit in the

   118      * hasSurrogates case because most string characters will be

   119      * smaller than the limit, but we keep it so that the two code

   120      * paths remain similar.

   121      */

   122     private int maxDelimCodePoint;

   124     /**

   125      * If delimiters include any surrogates (including surrogate

   126      * pairs), hasSurrogates is true and the tokenizer uses the

   127      * different code path. This is because String.indexOf(int)

   128      * doesn't handle unpaired surrogates as a single character.

   129      */

   130     private boolean hasSurrogates = false;

   132     /**

   133      * When hasSurrogates is true, delimiters are converted to code

   134      * points and isDelimiter(int) is used to determine if the given

   135      * codepoint is a delimiter.

   136      */

   137     private int[] delimiterCodePoints;

   139     /**

   140      * Set maxDelimCodePoint to the highest char in the delimiter set.

   141      */

   142     private void setMaxDelimCodePoint() {

   143         if (delimiters == null) {

   144             maxDelimCodePoint = 0;

   145             return;

   146         }

   148         int m = 0;

   149         int c;

   150         int count = 0;

   151         for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) {

   152             c = delimiters.charAt(i);

   153             if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) {

   154                 c = delimiters.codePointAt(i);

   155                 hasSurrogates = true;

   156             }

   157             if (m < c)

   158                 m = c;

   159             count++;

   160         }

   161         maxDelimCodePoint = m;

   163         if (hasSurrogates) {

   164             delimiterCodePoints = new int[count];

   165             for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) {

   166                 c = delimiters.codePointAt(j);

   167                 delimiterCodePoints[i] = c;

   168             }

   169         }

   170     }

   172     /**

   173      * Constructs a string tokenizer for the specified string. All

   174      * characters in the <code>delim</code> argument are the delimiters

   175      * for separating tokens.

   176      * <p>

   177      * If the <code>returnDelims</code> flag is <code>true</code>, then

   178      * the delimiter characters are also returned as tokens. Each

   179      * delimiter is returned as a string of length one. If the flag is

   180      * <code>false</code>, the delimiter characters are skipped and only

   181      * serve as separators between tokens.

   182      * <p>

   183      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does

   184      * not throw an exception. However, trying to invoke other methods on the

   185      * resulting <tt>StringTokenizer</tt> may result in a

   186      * <tt>NullPointerException</tt>.

   187      *

   188      * @param   str            a string to be parsed.

   189      * @param   delim          the delimiters.

   190      * @param   returnDelims   flag indicating whether to return the delimiters

   191      *                         as tokens.

   192      * @exception NullPointerException if str is <CODE>null</CODE>

   193      */

   194     public StringTokenizer(String str, String delim, boolean returnDelims) {

   195         currentPosition = 0;

   196         newPosition = -1;

   197         delimsChanged = false;

   198         this.str = str;

   199         maxPosition = str.length();

   200         delimiters = delim;

   201         retDelims = returnDelims;

   202         setMaxDelimCodePoint();

   203     }

   205     /**

   206      * Constructs a string tokenizer for the specified string. The

   207      * characters in the <code>delim</code> argument are the delimiters

   208      * for separating tokens. Delimiter characters themselves will not

   209      * be treated as tokens.

   210      * <p>

   211      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does

   212      * not throw an exception. However, trying to invoke other methods on the

   213      * resulting <tt>StringTokenizer</tt> may result in a

   214      * <tt>NullPointerException</tt>.

   215      *

   216      * @param   str     a string to be parsed.

   217      * @param   delim   the delimiters.

   218      * @exception NullPointerException if str is <CODE>null</CODE>

   219      */

   220     public StringTokenizer(String str, String delim) {

   221         this(str, delim, false);

   222     }

   224     /**

   225      * Constructs a string tokenizer for the specified string. The

   226      * tokenizer uses the default delimiter set, which is

   227      * <code>"&nbsp;&#92;t&#92;n&#92;r&#92;f"</code>: the space character,

   228      * the tab character, the newline character, the carriage-return character,

   229      * and the form-feed character. Delimiter characters themselves will

   230      * not be treated as tokens.

   231      *

   232      * @param   str   a string to be parsed.

   233      * @exception NullPointerException if str is <CODE>null</CODE>

   234      */

   235     public StringTokenizer(String str) {

   236         this(str, " \t\n\r\f", false);

   237     }

   239     /**

   240      * Skips delimiters starting from the specified position. If retDelims

   241      * is false, returns the index of the first non-delimiter character at or

   242      * after startPos. If retDelims is true, startPos is returned.

   243      */

   244     private int skipDelimiters(int startPos) {

   245         if (delimiters == null)

   246             throw new NullPointerException();

   248         int position = startPos;

   249         while (!retDelims && position < maxPosition) {

   250             if (!hasSurrogates) {

   251                 char c = str.charAt(position);

   252                 if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0))

   253                     break;

   254                 position++;

   255             } else {

   256                 int c = str.codePointAt(position);

   257                 if ((c > maxDelimCodePoint) || !isDelimiter(c)) {

   258                     break;

   259                 }

   260                 position += Character.charCount(c);

   261             }

   262         }

   263         return position;

   264     }

   266     /**

   267      * Skips ahead from startPos and returns the index of the next delimiter

   268      * character encountered, or maxPosition if no such delimiter is found.

   269      */

   270     private int scanToken(int startPos) {

   271         int position = startPos;

   272         while (position < maxPosition) {

   273             if (!hasSurrogates) {

   274                 char c = str.charAt(position);

   275                 if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))

   276                     break;

   277                 position++;

   278             } else {

   279                 int c = str.codePointAt(position);

   280                 if ((c <= maxDelimCodePoint) && isDelimiter(c))

   281                     break;

   282                 position += Character.charCount(c);

   283             }

   284         }

   285         if (retDelims && (startPos == position)) {

   286             if (!hasSurrogates) {

   287                 char c = str.charAt(position);

   288                 if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))

   289                     position++;

   290             } else {

   291                 int c = str.codePointAt(position);

   292                 if ((c <= maxDelimCodePoint) && isDelimiter(c))

   293                     position += Character.charCount(c);

   294             }

   295         }

   296         return position;

   297     }

   299     private boolean isDelimiter(int codePoint) {

   300         for (int i = 0; i < delimiterCodePoints.length; i++) {

   301             if (delimiterCodePoints[i] == codePoint) {

   302                 return true;

   303             }

   304         }

   305         return false;

   306     }

   308     /**

   309      * Tests if there are more tokens available from this tokenizer's string.

   310      * If this method returns <tt>true</tt>, then a subsequent call to

   311      * <tt>nextToken</tt> with no argument will successfully return a token.

   312      *

   313      * @return  <code>true</code> if and only if there is at least one token

   314      *          in the string after the current position; <code>false</code>

   315      *          otherwise.

   316      */

   317     public boolean hasMoreTokens() {

   318         /*

   319          * Temporarily store this position and use it in the following

   320          * nextToken() method only if the delimiters haven't been changed in

   321          * that nextToken() invocation.

   322          */

   323         newPosition = skipDelimiters(currentPosition);

   324         return (newPosition < maxPosition);

   325     }

   327     /**

   328      * Returns the next token from this string tokenizer.

   329      *

   330      * @return     the next token from this string tokenizer.

   331      * @exception  NoSuchElementException  if there are no more tokens in this

   332      *               tokenizer's string.

   333      */

   334     public String nextToken() {

   335         /*

   336          * If next position already computed in hasMoreElements() and

   337          * delimiters have changed between the computation and this invocation,

   338          * then use the computed value.

   339          */

   341         currentPosition = (newPosition >= 0 && !delimsChanged) ?

   342             newPosition : skipDelimiters(currentPosition);

   344         /* Reset these anyway */

   345         delimsChanged = false;

   346         newPosition = -1;

   348         if (currentPosition >= maxPosition)

   349             throw new NoSuchElementException();

   350         int start = currentPosition;

   351         currentPosition = scanToken(currentPosition);

   352         return str.substring(start, currentPosition);

   353     }

   355     /**

   356      * Returns the next token in this string tokenizer's string. First,

   357      * the set of characters considered to be delimiters by this

   358      * <tt>StringTokenizer</tt> object is changed to be the characters in

   359      * the string <tt>delim</tt>. Then the next token in the string

   360      * after the current position is returned. The current position is

   361      * advanced beyond the recognized token.  The new delimiter set

   362      * remains the default after this call.

   363      *

   364      * @param      delim   the new delimiters.

   365      * @return     the next token, after switching to the new delimiter set.

   366      * @exception  NoSuchElementException  if there are no more tokens in this

   367      *               tokenizer's string.

   368      * @exception NullPointerException if delim is <CODE>null</CODE>

   369      */

   370     public String nextToken(String delim) {

   371         delimiters = delim;

   373         /* delimiter string specified, so set the appropriate flag. */

   374         delimsChanged = true;

   376         setMaxDelimCodePoint();

   377         return nextToken();

   378     }

   380     /**

   381      * Returns the same value as the <code>hasMoreTokens</code>

   382      * method. It exists so that this class can implement the

   383      * <code>Enumeration</code> interface.

   384      *

   385      * @return  <code>true</code> if there are more tokens;

   386      *          <code>false</code> otherwise.

   387      * @see     java.util.Enumeration

   388      * @see     java.util.StringTokenizer#hasMoreTokens()

   389      */

   390     public boolean hasMoreElements() {

   391         return hasMoreTokens();

   392     }

   394     /**

   395      * Returns the same value as the <code>nextToken</code> method,

   396      * except that its declared return value is <code>Object</code> rather than

   397      * <code>String</code>. It exists so that this class can implement the

   398      * <code>Enumeration</code> interface.

   399      *

   400      * @return     the next token in the string.

   401      * @exception  NoSuchElementException  if there are no more tokens in this

   402      *               tokenizer's string.

   403      * @see        java.util.Enumeration

   404      * @see        java.util.StringTokenizer#nextToken()

   405      */

   406     public Object nextElement() {

   407         return nextToken();

   408     }

   410     /**

   411      * Calculates the number of times that this tokenizer's

   412      * <code>nextToken</code> method can be called before it generates an

   413      * exception. The current position is not advanced.

   414      *

   415      * @return  the number of tokens remaining in the string using the current

   416      *          delimiter set.

   417      * @see     java.util.StringTokenizer#nextToken()

   418      */

   419     public int countTokens() {

   420         int count = 0;

   421         int currpos = currentPosition;

   422         while (currpos < maxPosition) {

   423             currpos = skipDelimiters(currpos);

   424             if (currpos >= maxPosition)

   425                 break;

   426             currpos = scanToken(currpos);

   427             count++;

   428         }

   429         return count;

   430     }

   431 }

author	Jaroslav Tulach <jaroslav.tulach@apidesign.org>
	Tue, 26 Feb 2013 16:54:16 +0100
changeset 772	d382dacfd73f
parent 597	emul/compact/src/main/java/java/util/StringTokenizer.java@ee8a922f4268
permissions	-rw-r--r--