diff -r ee8a922f4268 -r d382dacfd73f rt/emul/compact/src/main/java/java/util/StringTokenizer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rt/emul/compact/src/main/java/java/util/StringTokenizer.java Tue Feb 26 16:54:16 2013 +0100 @@ -0,0 +1,431 @@ +/* + * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package java.util; + +import java.lang.*; + +/** + * The string tokenizer class allows an application to break a + * string into tokens. The tokenization method is much simpler than + * the one used by the StreamTokenizer class. The + * StringTokenizer methods do not distinguish among + * identifiers, numbers, and quoted strings, nor do they recognize + * and skip comments. + *

+ * The set of delimiters (the characters that separate tokens) may + * be specified either at creation time or on a per-token basis. + *

+ * An instance of StringTokenizer behaves in one of two + * ways, depending on whether it was created with the + * returnDelims flag having the value true + * or false: + *

+ * A StringTokenizer object internally maintains a current + * position within the string to be tokenized. Some operations advance this + * current position past the characters processed.

+ * A token is returned by taking a substring of the string that was used to + * create the StringTokenizer object. + *

+ * The following is one example of the use of the tokenizer. The code: + *

+ *     StringTokenizer st = new StringTokenizer("this is a test");
+ *     while (st.hasMoreTokens()) {
+ *         System.out.println(st.nextToken());
+ *     }
+ * 
+ *

+ * prints the following output: + *

+ *     this
+ *     is
+ *     a
+ *     test
+ * 
+ * + *

+ * StringTokenizer is a legacy class that is retained for + * compatibility reasons although its use is discouraged in new code. It is + * recommended that anyone seeking this functionality use the split + * method of String or the java.util.regex package instead. + *

+ * The following example illustrates how the String.split + * method can be used to break up a string into its basic tokens: + *

+ *     String[] result = "this is a test".split("\\s");
+ *     for (int x=0; x<result.length; x++)
+ *         System.out.println(result[x]);
+ * 
+ *

+ * prints the following output: + *

+ *     this
+ *     is
+ *     a
+ *     test
+ * 
+ * + * @author unascribed + * @see java.io.StreamTokenizer + * @since JDK1.0 + */ +public +class StringTokenizer implements Enumeration { + private int currentPosition; + private int newPosition; + private int maxPosition; + private String str; + private String delimiters; + private boolean retDelims; + private boolean delimsChanged; + + /** + * maxDelimCodePoint stores the value of the delimiter character with the + * highest value. It is used to optimize the detection of delimiter + * characters. + * + * It is unlikely to provide any optimization benefit in the + * hasSurrogates case because most string characters will be + * smaller than the limit, but we keep it so that the two code + * paths remain similar. + */ + private int maxDelimCodePoint; + + /** + * If delimiters include any surrogates (including surrogate + * pairs), hasSurrogates is true and the tokenizer uses the + * different code path. This is because String.indexOf(int) + * doesn't handle unpaired surrogates as a single character. + */ + private boolean hasSurrogates = false; + + /** + * When hasSurrogates is true, delimiters are converted to code + * points and isDelimiter(int) is used to determine if the given + * codepoint is a delimiter. + */ + private int[] delimiterCodePoints; + + /** + * Set maxDelimCodePoint to the highest char in the delimiter set. + */ + private void setMaxDelimCodePoint() { + if (delimiters == null) { + maxDelimCodePoint = 0; + return; + } + + int m = 0; + int c; + int count = 0; + for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) { + c = delimiters.charAt(i); + if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) { + c = delimiters.codePointAt(i); + hasSurrogates = true; + } + if (m < c) + m = c; + count++; + } + maxDelimCodePoint = m; + + if (hasSurrogates) { + delimiterCodePoints = new int[count]; + for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) { + c = delimiters.codePointAt(j); + delimiterCodePoints[i] = c; + } + } + } + + /** + * Constructs a string tokenizer for the specified string. All + * characters in the delim argument are the delimiters + * for separating tokens. + *

+ * If the returnDelims flag is true, then + * the delimiter characters are also returned as tokens. Each + * delimiter is returned as a string of length one. If the flag is + * false, the delimiter characters are skipped and only + * serve as separators between tokens. + *

+ * Note that if delim is null, this constructor does + * not throw an exception. However, trying to invoke other methods on the + * resulting StringTokenizer may result in a + * NullPointerException. + * + * @param str a string to be parsed. + * @param delim the delimiters. + * @param returnDelims flag indicating whether to return the delimiters + * as tokens. + * @exception NullPointerException if str is null + */ + public StringTokenizer(String str, String delim, boolean returnDelims) { + currentPosition = 0; + newPosition = -1; + delimsChanged = false; + this.str = str; + maxPosition = str.length(); + delimiters = delim; + retDelims = returnDelims; + setMaxDelimCodePoint(); + } + + /** + * Constructs a string tokenizer for the specified string. The + * characters in the delim argument are the delimiters + * for separating tokens. Delimiter characters themselves will not + * be treated as tokens. + *

+ * Note that if delim is null, this constructor does + * not throw an exception. However, trying to invoke other methods on the + * resulting StringTokenizer may result in a + * NullPointerException. + * + * @param str a string to be parsed. + * @param delim the delimiters. + * @exception NullPointerException if str is null + */ + public StringTokenizer(String str, String delim) { + this(str, delim, false); + } + + /** + * Constructs a string tokenizer for the specified string. The + * tokenizer uses the default delimiter set, which is + * " \t\n\r\f": the space character, + * the tab character, the newline character, the carriage-return character, + * and the form-feed character. Delimiter characters themselves will + * not be treated as tokens. + * + * @param str a string to be parsed. + * @exception NullPointerException if str is null + */ + public StringTokenizer(String str) { + this(str, " \t\n\r\f", false); + } + + /** + * Skips delimiters starting from the specified position. If retDelims + * is false, returns the index of the first non-delimiter character at or + * after startPos. If retDelims is true, startPos is returned. + */ + private int skipDelimiters(int startPos) { + if (delimiters == null) + throw new NullPointerException(); + + int position = startPos; + while (!retDelims && position < maxPosition) { + if (!hasSurrogates) { + char c = str.charAt(position); + if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0)) + break; + position++; + } else { + int c = str.codePointAt(position); + if ((c > maxDelimCodePoint) || !isDelimiter(c)) { + break; + } + position += Character.charCount(c); + } + } + return position; + } + + /** + * Skips ahead from startPos and returns the index of the next delimiter + * character encountered, or maxPosition if no such delimiter is found. + */ + private int scanToken(int startPos) { + int position = startPos; + while (position < maxPosition) { + if (!hasSurrogates) { + char c = str.charAt(position); + if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) + break; + position++; + } else { + int c = str.codePointAt(position); + if ((c <= maxDelimCodePoint) && isDelimiter(c)) + break; + position += Character.charCount(c); + } + } + if (retDelims && (startPos == position)) { + if (!hasSurrogates) { + char c = str.charAt(position); + if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) + position++; + } else { + int c = str.codePointAt(position); + if ((c <= maxDelimCodePoint) && isDelimiter(c)) + position += Character.charCount(c); + } + } + return position; + } + + private boolean isDelimiter(int codePoint) { + for (int i = 0; i < delimiterCodePoints.length; i++) { + if (delimiterCodePoints[i] == codePoint) { + return true; + } + } + return false; + } + + /** + * Tests if there are more tokens available from this tokenizer's string. + * If this method returns true, then a subsequent call to + * nextToken with no argument will successfully return a token. + * + * @return true if and only if there is at least one token + * in the string after the current position; false + * otherwise. + */ + public boolean hasMoreTokens() { + /* + * Temporarily store this position and use it in the following + * nextToken() method only if the delimiters haven't been changed in + * that nextToken() invocation. + */ + newPosition = skipDelimiters(currentPosition); + return (newPosition < maxPosition); + } + + /** + * Returns the next token from this string tokenizer. + * + * @return the next token from this string tokenizer. + * @exception NoSuchElementException if there are no more tokens in this + * tokenizer's string. + */ + public String nextToken() { + /* + * If next position already computed in hasMoreElements() and + * delimiters have changed between the computation and this invocation, + * then use the computed value. + */ + + currentPosition = (newPosition >= 0 && !delimsChanged) ? + newPosition : skipDelimiters(currentPosition); + + /* Reset these anyway */ + delimsChanged = false; + newPosition = -1; + + if (currentPosition >= maxPosition) + throw new NoSuchElementException(); + int start = currentPosition; + currentPosition = scanToken(currentPosition); + return str.substring(start, currentPosition); + } + + /** + * Returns the next token in this string tokenizer's string. First, + * the set of characters considered to be delimiters by this + * StringTokenizer object is changed to be the characters in + * the string delim. Then the next token in the string + * after the current position is returned. The current position is + * advanced beyond the recognized token. The new delimiter set + * remains the default after this call. + * + * @param delim the new delimiters. + * @return the next token, after switching to the new delimiter set. + * @exception NoSuchElementException if there are no more tokens in this + * tokenizer's string. + * @exception NullPointerException if delim is null + */ + public String nextToken(String delim) { + delimiters = delim; + + /* delimiter string specified, so set the appropriate flag. */ + delimsChanged = true; + + setMaxDelimCodePoint(); + return nextToken(); + } + + /** + * Returns the same value as the hasMoreTokens + * method. It exists so that this class can implement the + * Enumeration interface. + * + * @return true if there are more tokens; + * false otherwise. + * @see java.util.Enumeration + * @see java.util.StringTokenizer#hasMoreTokens() + */ + public boolean hasMoreElements() { + return hasMoreTokens(); + } + + /** + * Returns the same value as the nextToken method, + * except that its declared return value is Object rather than + * String. It exists so that this class can implement the + * Enumeration interface. + * + * @return the next token in the string. + * @exception NoSuchElementException if there are no more tokens in this + * tokenizer's string. + * @see java.util.Enumeration + * @see java.util.StringTokenizer#nextToken() + */ + public Object nextElement() { + return nextToken(); + } + + /** + * Calculates the number of times that this tokenizer's + * nextToken method can be called before it generates an + * exception. The current position is not advanced. + * + * @return the number of tokens remaining in the string using the current + * delimiter set. + * @see java.util.StringTokenizer#nextToken() + */ + public int countTokens() { + int count = 0; + int currpos = currentPosition; + while (currpos < maxPosition) { + currpos = skipDelimiters(currpos); + if (currpos >= maxPosition) + break; + currpos = scanToken(currpos); + count++; + } + return count; + } +}