jaroslav@597: /* jaroslav@597: * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved. jaroslav@597: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. jaroslav@597: * jaroslav@597: * This code is free software; you can redistribute it and/or modify it jaroslav@597: * under the terms of the GNU General Public License version 2 only, as jaroslav@597: * published by the Free Software Foundation. Oracle designates this jaroslav@597: * particular file as subject to the "Classpath" exception as provided jaroslav@597: * by Oracle in the LICENSE file that accompanied this code. jaroslav@597: * jaroslav@597: * This code is distributed in the hope that it will be useful, but WITHOUT jaroslav@597: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or jaroslav@597: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License jaroslav@597: * version 2 for more details (a copy is included in the LICENSE file that jaroslav@597: * accompanied this code). jaroslav@597: * jaroslav@597: * You should have received a copy of the GNU General Public License version jaroslav@597: * 2 along with this work; if not, write to the Free Software Foundation, jaroslav@597: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. jaroslav@597: * jaroslav@597: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA jaroslav@597: * or visit www.oracle.com if you need additional information or have any jaroslav@597: * questions. jaroslav@597: */ jaroslav@597: jaroslav@597: package java.util; jaroslav@597: jaroslav@597: import java.lang.*; jaroslav@597: jaroslav@597: /** jaroslav@597: * The string tokenizer class allows an application to break a jaroslav@597: * string into tokens. The tokenization method is much simpler than jaroslav@597: * the one used by the StreamTokenizer class. The jaroslav@597: * StringTokenizer methods do not distinguish among jaroslav@597: * identifiers, numbers, and quoted strings, nor do they recognize jaroslav@597: * and skip comments. jaroslav@597: *

jaroslav@597: * The set of delimiters (the characters that separate tokens) may jaroslav@597: * be specified either at creation time or on a per-token basis. jaroslav@597: *

jaroslav@597: * An instance of StringTokenizer behaves in one of two jaroslav@597: * ways, depending on whether it was created with the jaroslav@597: * returnDelims flag having the value true jaroslav@597: * or false: jaroslav@597: *

jaroslav@597: * A StringTokenizer object internally maintains a current jaroslav@597: * position within the string to be tokenized. Some operations advance this jaroslav@597: * current position past the characters processed.

jaroslav@597: * A token is returned by taking a substring of the string that was used to jaroslav@597: * create the StringTokenizer object. jaroslav@597: *

jaroslav@597: * The following is one example of the use of the tokenizer. The code: jaroslav@597: *

jaroslav@597:  *     StringTokenizer st = new StringTokenizer("this is a test");
jaroslav@597:  *     while (st.hasMoreTokens()) {
jaroslav@597:  *         System.out.println(st.nextToken());
jaroslav@597:  *     }
jaroslav@597:  * 
jaroslav@597: *

jaroslav@597: * prints the following output: jaroslav@597: *

jaroslav@597:  *     this
jaroslav@597:  *     is
jaroslav@597:  *     a
jaroslav@597:  *     test
jaroslav@597:  * 
jaroslav@597: * jaroslav@597: *

jaroslav@597: * StringTokenizer is a legacy class that is retained for jaroslav@597: * compatibility reasons although its use is discouraged in new code. It is jaroslav@597: * recommended that anyone seeking this functionality use the split jaroslav@597: * method of String or the java.util.regex package instead. jaroslav@597: *

jaroslav@597: * The following example illustrates how the String.split jaroslav@597: * method can be used to break up a string into its basic tokens: jaroslav@597: *

jaroslav@597:  *     String[] result = "this is a test".split("\\s");
jaroslav@597:  *     for (int x=0; x<result.length; x++)
jaroslav@597:  *         System.out.println(result[x]);
jaroslav@597:  * 
jaroslav@597: *

jaroslav@597: * prints the following output: jaroslav@597: *

jaroslav@597:  *     this
jaroslav@597:  *     is
jaroslav@597:  *     a
jaroslav@597:  *     test
jaroslav@597:  * 
jaroslav@597: * jaroslav@597: * @author unascribed jaroslav@597: * @see java.io.StreamTokenizer jaroslav@597: * @since JDK1.0 jaroslav@597: */ jaroslav@597: public jaroslav@597: class StringTokenizer implements Enumeration { jaroslav@597: private int currentPosition; jaroslav@597: private int newPosition; jaroslav@597: private int maxPosition; jaroslav@597: private String str; jaroslav@597: private String delimiters; jaroslav@597: private boolean retDelims; jaroslav@597: private boolean delimsChanged; jaroslav@597: jaroslav@597: /** jaroslav@597: * maxDelimCodePoint stores the value of the delimiter character with the jaroslav@597: * highest value. It is used to optimize the detection of delimiter jaroslav@597: * characters. jaroslav@597: * jaroslav@597: * It is unlikely to provide any optimization benefit in the jaroslav@597: * hasSurrogates case because most string characters will be jaroslav@597: * smaller than the limit, but we keep it so that the two code jaroslav@597: * paths remain similar. jaroslav@597: */ jaroslav@597: private int maxDelimCodePoint; jaroslav@597: jaroslav@597: /** jaroslav@597: * If delimiters include any surrogates (including surrogate jaroslav@597: * pairs), hasSurrogates is true and the tokenizer uses the jaroslav@597: * different code path. This is because String.indexOf(int) jaroslav@597: * doesn't handle unpaired surrogates as a single character. jaroslav@597: */ jaroslav@597: private boolean hasSurrogates = false; jaroslav@597: jaroslav@597: /** jaroslav@597: * When hasSurrogates is true, delimiters are converted to code jaroslav@597: * points and isDelimiter(int) is used to determine if the given jaroslav@597: * codepoint is a delimiter. jaroslav@597: */ jaroslav@597: private int[] delimiterCodePoints; jaroslav@597: jaroslav@597: /** jaroslav@597: * Set maxDelimCodePoint to the highest char in the delimiter set. jaroslav@597: */ jaroslav@597: private void setMaxDelimCodePoint() { jaroslav@597: if (delimiters == null) { jaroslav@597: maxDelimCodePoint = 0; jaroslav@597: return; jaroslav@597: } jaroslav@597: jaroslav@597: int m = 0; jaroslav@597: int c; jaroslav@597: int count = 0; jaroslav@597: for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) { jaroslav@597: c = delimiters.charAt(i); jaroslav@597: if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) { jaroslav@597: c = delimiters.codePointAt(i); jaroslav@597: hasSurrogates = true; jaroslav@597: } jaroslav@597: if (m < c) jaroslav@597: m = c; jaroslav@597: count++; jaroslav@597: } jaroslav@597: maxDelimCodePoint = m; jaroslav@597: jaroslav@597: if (hasSurrogates) { jaroslav@597: delimiterCodePoints = new int[count]; jaroslav@597: for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) { jaroslav@597: c = delimiters.codePointAt(j); jaroslav@597: delimiterCodePoints[i] = c; jaroslav@597: } jaroslav@597: } jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Constructs a string tokenizer for the specified string. All jaroslav@597: * characters in the delim argument are the delimiters jaroslav@597: * for separating tokens. jaroslav@597: *

jaroslav@597: * If the returnDelims flag is true, then jaroslav@597: * the delimiter characters are also returned as tokens. Each jaroslav@597: * delimiter is returned as a string of length one. If the flag is jaroslav@597: * false, the delimiter characters are skipped and only jaroslav@597: * serve as separators between tokens. jaroslav@597: *

jaroslav@597: * Note that if delim is null, this constructor does jaroslav@597: * not throw an exception. However, trying to invoke other methods on the jaroslav@597: * resulting StringTokenizer may result in a jaroslav@597: * NullPointerException. jaroslav@597: * jaroslav@597: * @param str a string to be parsed. jaroslav@597: * @param delim the delimiters. jaroslav@597: * @param returnDelims flag indicating whether to return the delimiters jaroslav@597: * as tokens. jaroslav@597: * @exception NullPointerException if str is null jaroslav@597: */ jaroslav@597: public StringTokenizer(String str, String delim, boolean returnDelims) { jaroslav@597: currentPosition = 0; jaroslav@597: newPosition = -1; jaroslav@597: delimsChanged = false; jaroslav@597: this.str = str; jaroslav@597: maxPosition = str.length(); jaroslav@597: delimiters = delim; jaroslav@597: retDelims = returnDelims; jaroslav@597: setMaxDelimCodePoint(); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Constructs a string tokenizer for the specified string. The jaroslav@597: * characters in the delim argument are the delimiters jaroslav@597: * for separating tokens. Delimiter characters themselves will not jaroslav@597: * be treated as tokens. jaroslav@597: *

jaroslav@597: * Note that if delim is null, this constructor does jaroslav@597: * not throw an exception. However, trying to invoke other methods on the jaroslav@597: * resulting StringTokenizer may result in a jaroslav@597: * NullPointerException. jaroslav@597: * jaroslav@597: * @param str a string to be parsed. jaroslav@597: * @param delim the delimiters. jaroslav@597: * @exception NullPointerException if str is null jaroslav@597: */ jaroslav@597: public StringTokenizer(String str, String delim) { jaroslav@597: this(str, delim, false); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Constructs a string tokenizer for the specified string. The jaroslav@597: * tokenizer uses the default delimiter set, which is jaroslav@597: * " \t\n\r\f": the space character, jaroslav@597: * the tab character, the newline character, the carriage-return character, jaroslav@597: * and the form-feed character. Delimiter characters themselves will jaroslav@597: * not be treated as tokens. jaroslav@597: * jaroslav@597: * @param str a string to be parsed. jaroslav@597: * @exception NullPointerException if str is null jaroslav@597: */ jaroslav@597: public StringTokenizer(String str) { jaroslav@597: this(str, " \t\n\r\f", false); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Skips delimiters starting from the specified position. If retDelims jaroslav@597: * is false, returns the index of the first non-delimiter character at or jaroslav@597: * after startPos. If retDelims is true, startPos is returned. jaroslav@597: */ jaroslav@597: private int skipDelimiters(int startPos) { jaroslav@597: if (delimiters == null) jaroslav@597: throw new NullPointerException(); jaroslav@597: jaroslav@597: int position = startPos; jaroslav@597: while (!retDelims && position < maxPosition) { jaroslav@597: if (!hasSurrogates) { jaroslav@597: char c = str.charAt(position); jaroslav@597: if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0)) jaroslav@597: break; jaroslav@597: position++; jaroslav@597: } else { jaroslav@597: int c = str.codePointAt(position); jaroslav@597: if ((c > maxDelimCodePoint) || !isDelimiter(c)) { jaroslav@597: break; jaroslav@597: } jaroslav@597: position += Character.charCount(c); jaroslav@597: } jaroslav@597: } jaroslav@597: return position; jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Skips ahead from startPos and returns the index of the next delimiter jaroslav@597: * character encountered, or maxPosition if no such delimiter is found. jaroslav@597: */ jaroslav@597: private int scanToken(int startPos) { jaroslav@597: int position = startPos; jaroslav@597: while (position < maxPosition) { jaroslav@597: if (!hasSurrogates) { jaroslav@597: char c = str.charAt(position); jaroslav@597: if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) jaroslav@597: break; jaroslav@597: position++; jaroslav@597: } else { jaroslav@597: int c = str.codePointAt(position); jaroslav@597: if ((c <= maxDelimCodePoint) && isDelimiter(c)) jaroslav@597: break; jaroslav@597: position += Character.charCount(c); jaroslav@597: } jaroslav@597: } jaroslav@597: if (retDelims && (startPos == position)) { jaroslav@597: if (!hasSurrogates) { jaroslav@597: char c = str.charAt(position); jaroslav@597: if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) jaroslav@597: position++; jaroslav@597: } else { jaroslav@597: int c = str.codePointAt(position); jaroslav@597: if ((c <= maxDelimCodePoint) && isDelimiter(c)) jaroslav@597: position += Character.charCount(c); jaroslav@597: } jaroslav@597: } jaroslav@597: return position; jaroslav@597: } jaroslav@597: jaroslav@597: private boolean isDelimiter(int codePoint) { jaroslav@597: for (int i = 0; i < delimiterCodePoints.length; i++) { jaroslav@597: if (delimiterCodePoints[i] == codePoint) { jaroslav@597: return true; jaroslav@597: } jaroslav@597: } jaroslav@597: return false; jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Tests if there are more tokens available from this tokenizer's string. jaroslav@597: * If this method returns true, then a subsequent call to jaroslav@597: * nextToken with no argument will successfully return a token. jaroslav@597: * jaroslav@597: * @return true if and only if there is at least one token jaroslav@597: * in the string after the current position; false jaroslav@597: * otherwise. jaroslav@597: */ jaroslav@597: public boolean hasMoreTokens() { jaroslav@597: /* jaroslav@597: * Temporarily store this position and use it in the following jaroslav@597: * nextToken() method only if the delimiters haven't been changed in jaroslav@597: * that nextToken() invocation. jaroslav@597: */ jaroslav@597: newPosition = skipDelimiters(currentPosition); jaroslav@597: return (newPosition < maxPosition); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Returns the next token from this string tokenizer. jaroslav@597: * jaroslav@597: * @return the next token from this string tokenizer. jaroslav@597: * @exception NoSuchElementException if there are no more tokens in this jaroslav@597: * tokenizer's string. jaroslav@597: */ jaroslav@597: public String nextToken() { jaroslav@597: /* jaroslav@597: * If next position already computed in hasMoreElements() and jaroslav@597: * delimiters have changed between the computation and this invocation, jaroslav@597: * then use the computed value. jaroslav@597: */ jaroslav@597: jaroslav@597: currentPosition = (newPosition >= 0 && !delimsChanged) ? jaroslav@597: newPosition : skipDelimiters(currentPosition); jaroslav@597: jaroslav@597: /* Reset these anyway */ jaroslav@597: delimsChanged = false; jaroslav@597: newPosition = -1; jaroslav@597: jaroslav@597: if (currentPosition >= maxPosition) jaroslav@597: throw new NoSuchElementException(); jaroslav@597: int start = currentPosition; jaroslav@597: currentPosition = scanToken(currentPosition); jaroslav@597: return str.substring(start, currentPosition); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Returns the next token in this string tokenizer's string. First, jaroslav@597: * the set of characters considered to be delimiters by this jaroslav@597: * StringTokenizer object is changed to be the characters in jaroslav@597: * the string delim. Then the next token in the string jaroslav@597: * after the current position is returned. The current position is jaroslav@597: * advanced beyond the recognized token. The new delimiter set jaroslav@597: * remains the default after this call. jaroslav@597: * jaroslav@597: * @param delim the new delimiters. jaroslav@597: * @return the next token, after switching to the new delimiter set. jaroslav@597: * @exception NoSuchElementException if there are no more tokens in this jaroslav@597: * tokenizer's string. jaroslav@597: * @exception NullPointerException if delim is null jaroslav@597: */ jaroslav@597: public String nextToken(String delim) { jaroslav@597: delimiters = delim; jaroslav@597: jaroslav@597: /* delimiter string specified, so set the appropriate flag. */ jaroslav@597: delimsChanged = true; jaroslav@597: jaroslav@597: setMaxDelimCodePoint(); jaroslav@597: return nextToken(); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Returns the same value as the hasMoreTokens jaroslav@597: * method. It exists so that this class can implement the jaroslav@597: * Enumeration interface. jaroslav@597: * jaroslav@597: * @return true if there are more tokens; jaroslav@597: * false otherwise. jaroslav@597: * @see java.util.Enumeration jaroslav@597: * @see java.util.StringTokenizer#hasMoreTokens() jaroslav@597: */ jaroslav@597: public boolean hasMoreElements() { jaroslav@597: return hasMoreTokens(); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Returns the same value as the nextToken method, jaroslav@597: * except that its declared return value is Object rather than jaroslav@597: * String. It exists so that this class can implement the jaroslav@597: * Enumeration interface. jaroslav@597: * jaroslav@597: * @return the next token in the string. jaroslav@597: * @exception NoSuchElementException if there are no more tokens in this jaroslav@597: * tokenizer's string. jaroslav@597: * @see java.util.Enumeration jaroslav@597: * @see java.util.StringTokenizer#nextToken() jaroslav@597: */ jaroslav@597: public Object nextElement() { jaroslav@597: return nextToken(); jaroslav@597: } jaroslav@597: jaroslav@597: /** jaroslav@597: * Calculates the number of times that this tokenizer's jaroslav@597: * nextToken method can be called before it generates an jaroslav@597: * exception. The current position is not advanced. jaroslav@597: * jaroslav@597: * @return the number of tokens remaining in the string using the current jaroslav@597: * delimiter set. jaroslav@597: * @see java.util.StringTokenizer#nextToken() jaroslav@597: */ jaroslav@597: public int countTokens() { jaroslav@597: int count = 0; jaroslav@597: int currpos = currentPosition; jaroslav@597: while (currpos < maxPosition) { jaroslav@597: currpos = skipDelimiters(currpos); jaroslav@597: if (currpos >= maxPosition) jaroslav@597: break; jaroslav@597: currpos = scanToken(currpos); jaroslav@597: count++; jaroslav@597: } jaroslav@597: return count; jaroslav@597: } jaroslav@597: }